]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
working on current change
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         
193                         //if the user changes the output directory command factory will send this info to us in the output parameter 
194                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
195                                 outputDir = ""; 
196                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
197                         }
198                 
199                         
200                         //check for optional parameter and set defaults
201                         // ...at some point should added some additional type checking...
202                         string temp;
203                         temp = validParameter.validFile(parameters, "flip", false);
204                         if (temp == "not found"){       flip = 0;       }
205                         else if(m->isTrue(temp))        {       flip = 1;       }
206                 
207                         temp = validParameter.validFile(parameters, "oligos", true);
208                         if (temp == "not found"){       oligoFile = "";         }
209                         else if(temp == "not open"){    abort = true;   } 
210                         else                                    {       oligoFile = temp;               }
211                         
212                         
213                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
214                         convert(temp, maxAmbig);  
215
216                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
217                         convert(temp, maxHomoP);  
218
219                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
220                         convert(temp, minLength); 
221                         
222                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
223                         convert(temp, maxLength);
224                         
225                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
226                         convert(temp, bdiffs);
227                         
228                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
229                         convert(temp, pdiffs);
230                         
231                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
232                         convert(temp, tdiffs);
233                         
234                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
235                         
236                         temp = validParameter.validFile(parameters, "qfile", true);     
237                         if (temp == "not found")        {       qFileName = "";         }
238                         else if(temp == "not open")     {       abort = true;           }
239                         else                                            {       qFileName = temp;       }
240                         
241                         temp = validParameter.validFile(parameters, "name", true);      
242                         if (temp == "not found")        {       nameFile = "";          }
243                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
244                         else                                            {       nameFile = temp;        }
245                         
246                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
247                         convert(temp, qThreshold);
248                         
249                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
250                         qtrim = m->isTrue(temp);
251
252                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
253                         convert(temp, qRollAverage);
254
255                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
256                         convert(temp, qWindowAverage);
257
258                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
259                         convert(temp, qWindowSize);
260
261                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
262                         convert(temp, qWindowStep);
263
264                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
265                         convert(temp, qAverage);
266
267                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
268                         convert(temp, keepFirst);
269
270                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
271                         convert(temp, removeLast);
272                         
273                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
274                         allFiles = m->isTrue(temp);
275                         
276                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
277                         m->setProcessors(temp);
278                         convert(temp, processors); 
279                         
280                         
281                         if(allFiles && (oligoFile == "")){
282                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
283                         }
284                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
285                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
286                                 qAverage=0;
287                                 qThreshold=0;
288                         }
289                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
290                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
291                                 abort = true;
292                         }
293                 }
294
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
298                 exit(1);
299         }
300 }
301 //***************************************************************************************************************
302
303 int TrimSeqsCommand::execute(){
304         try{
305         
306                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
307                 
308                 numFPrimers = 0;  //this needs to be initialized
309                 numRPrimers = 0;
310                 vector<vector<string> > fastaFileNames;
311                 vector<vector<string> > qualFileNames;
312                 vector<vector<string> > nameFileNames;
313                 
314                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
315                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
316                 
317                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
318                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
319                 
320                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
321                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
322                 
323                 if (qFileName != "") {
324                         outputNames.push_back(trimQualFile);
325                         outputNames.push_back(scrapQualFile);
326                         outputTypes["qfile"].push_back(trimQualFile);
327                         outputTypes["qfile"].push_back(scrapQualFile); 
328                 }
329                 
330                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
331                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
332                 
333                 if (nameFile != "") {
334                         m->readNames(nameFile, nameMap);
335                         outputNames.push_back(trimNameFile);
336                         outputNames.push_back(scrapNameFile);
337                         outputTypes["name"].push_back(trimNameFile);
338                         outputTypes["name"].push_back(scrapNameFile); 
339                 }
340                 
341                 if (m->control_pressed) { return 0; }
342                 
343                 string outputGroupFileName;
344                 if(oligoFile != ""){
345                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
346                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
347                         getOligos(fastaFileNames, qualFileNames, nameFileNames);
348                 }
349                 
350                 vector<unsigned long int> fastaFilePos;
351                 vector<unsigned long int> qFilePos;
352                 
353                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
354                 
355                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
356                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
357                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
358                 }       
359                 if(qFileName == "")     {       qLines = lines; } //files with duds
360                 
361                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
362                                 if(processors == 1){
363                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
364                                 }else{
365                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
366                                 }       
367                 #else
368                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
369                 #endif
370                 
371                 if (m->control_pressed) {  return 0; }                  
372         
373                 if(allFiles){
374                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
375                         map<string, string>::iterator it;
376                         set<string> namesToRemove;
377                         for(int i=0;i<fastaFileNames.size();i++){
378                                 for(int j=0;j<fastaFileNames[0].size();j++){
379                                         if (fastaFileNames[i][j] != "") {
380                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
381                                                         if(m->isBlank(fastaFileNames[i][j])){
382                                                                 remove(fastaFileNames[i][j].c_str());
383                                                                 namesToRemove.insert(fastaFileNames[i][j]);
384                                                         
385                                                                 if(qFileName != ""){
386                                                                         remove(qualFileNames[i][j].c_str());
387                                                                         namesToRemove.insert(qualFileNames[i][j]);
388                                                                 }
389                                                                 
390                                                                 if(nameFile != ""){
391                                                                         remove(nameFileNames[i][j].c_str());
392                                                                         namesToRemove.insert(nameFileNames[i][j]);
393                                                                 }
394                                                         }else{  
395                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
396                                                                 if (it == uniqueFastaNames.end()) {     
397                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
398                                                                 }       
399                                                         }
400                                                 }
401                                         }
402                                 }
403                         }
404                         
405                         //remove names for outputFileNames, just cleans up the output
406                         vector<string> outputNames2;
407                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
408                         outputNames = outputNames2;
409                         
410                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
411                                 ifstream in;
412                                 m->openInputFile(it->first, in);
413                                 
414                                 ofstream out;
415                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
416                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
417                                 m->openOutputFile(thisGroupName, out);
418                                 
419                                 while (!in.eof()){
420                                         if (m->control_pressed) { break; }
421                                         
422                                         Sequence currSeq(in); m->gobble(in);
423                                         out << currSeq.getName() << '\t' << it->second << endl;
424                                 }
425                                 in.close();
426                                 out.close();
427                         }
428                 }
429                 
430                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
431
432                 //output group counts
433                 m->mothurOutEndLine();
434                 int total = 0;
435                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
436                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
437                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
438                 }
439                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
440                 
441                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
442
443                 //set fasta file as new current fastafile
444                 string current = "";
445                 itTypes = outputTypes.find("fasta");
446                 if (itTypes != outputTypes.end()) {
447                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
448                 }
449                 
450                 itTypes = outputTypes.find("name");
451                 if (itTypes != outputTypes.end()) {
452                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
453                 }
454                 
455                 itTypes = outputTypes.find("qfile");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
458                 }
459                 
460                 itTypes = outputTypes.find("group");
461                 if (itTypes != outputTypes.end()) {
462                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
463                 }
464
465                 m->mothurOutEndLine();
466                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
467                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
468                 m->mothurOutEndLine();
469                 
470                 return 0;       
471                         
472         }
473         catch(exception& e) {
474                 m->errorOut(e, "TrimSeqsCommand", "execute");
475                 exit(1);
476         }
477 }
478                 
479 /**************************************************************************************/
480
481 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
482                 
483         try {
484                 
485                 ofstream trimFASTAFile;
486                 m->openOutputFile(trimFileName, trimFASTAFile);
487                 
488                 ofstream scrapFASTAFile;
489                 m->openOutputFile(scrapFileName, scrapFASTAFile);
490                 
491                 ofstream trimQualFile;
492                 ofstream scrapQualFile;
493                 if(qFileName != ""){
494                         m->openOutputFile(trimQFileName, trimQualFile);
495                         m->openOutputFile(scrapQFileName, scrapQualFile);
496                 }
497                 
498                 ofstream trimNameFile;
499                 ofstream scrapNameFile;
500                 if(nameFile != ""){
501                         m->openOutputFile(trimNFileName, trimNameFile);
502                         m->openOutputFile(scrapNFileName, scrapNameFile);
503                 }
504                 
505                 
506                 ofstream outGroupsFile;
507                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
508                 if(allFiles){
509                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
510                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
511                                         if (fastaFileNames[i][j] != "") {
512                                                 ofstream temp;
513                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
514                                                 if(qFileName != ""){
515                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
516                                                 }
517                                                 
518                                                 if(nameFile != ""){
519                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
520                                                 }
521                                         }
522                                 }
523                         }
524                 }
525                 
526                 ifstream inFASTA;
527                 m->openInputFile(filename, inFASTA);
528                 inFASTA.seekg(line->start);
529                 
530                 ifstream qFile;
531                 if(qFileName != "")     {
532                         m->openInputFile(qFileName, qFile);
533                         qFile.seekg(qline->start);  
534                 }
535                 
536                 int count = 0;
537                 bool moreSeqs = 1;
538         
539                 while (moreSeqs) {
540                                 
541                         if (m->control_pressed) { 
542                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
543                                 if (oligoFile != "") {   outGroupsFile.close();   }
544
545                                 if(qFileName != ""){
546                                         qFile.close();
547                                 }
548                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
549
550                                 return 0;
551                         }
552                         
553                         int success = 1;
554                         string trashCode = "";
555                         int currentSeqsDiffs = 0;
556
557                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
558                         QualityScores currQual;
559                         if(qFileName != ""){
560                                 currQual = QualityScores(qFile);  m->gobble(qFile);
561                         }
562                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
563                         string origSeq = currSeq.getUnaligned();
564                         if (origSeq != "") {
565                                 
566                                 int barcodeIndex = 0;
567                                 int primerIndex = 0;
568                                 
569                                 if(barcodes.size() != 0){
570                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
571                                         if(success > bdiffs)            {       trashCode += 'b';       }
572                                         else{ currentSeqsDiffs += success;  }
573                                 }
574                                 
575                                 if(numFPrimers != 0){
576                                         success = stripForward(currSeq, currQual, primerIndex);
577                                         if(success > pdiffs)            {       trashCode += 'f';       }
578                                         else{ currentSeqsDiffs += success;  }
579                                 }
580                                 
581                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
582                                 
583                                 if(numRPrimers != 0){
584                                         success = stripReverse(currSeq, currQual);
585                                         if(!success)                            {       trashCode += 'r';       }
586                                 }
587
588                                 if(keepFirst != 0){
589                                         success = keepFirstTrim(currSeq, currQual);
590                                 }
591                                 
592                                 if(removeLast != 0){
593                                         success = removeLastTrim(currSeq, currQual);
594                                         if(!success)                            {       trashCode += 'l';       }
595                                 }
596
597                                 
598                                 if(qFileName != ""){
599                                         int origLength = currSeq.getNumBases();
600                                         
601                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
602                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
603                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
604                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
605                                         else                                            {       success = 1;                            }
606                                 
607                                         //you don't want to trim, if it fails above then scrap it
608                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
609                                         
610                                         if(!success)                            {       trashCode += 'q';       }
611                                 }                               
612                 
613                                 if(minLength > 0 || maxLength > 0){
614                                         success = cullLength(currSeq);
615                                         if(!success)                            {       trashCode += 'l';       }
616                                 }
617                                 if(maxHomoP > 0){
618                                         success = cullHomoP(currSeq);
619                                         if(!success)                            {       trashCode += 'h';       }
620                                 }
621                                 if(maxAmbig != -1){
622                                         success = cullAmbigs(currSeq);
623                                         if(!success)                            {       trashCode += 'n';       }
624                                 }
625                                 
626                                 if(flip){               // should go last                       
627                                         currSeq.reverseComplement();
628                                         if(qFileName != ""){
629                                                 currQual.flipQScores(); 
630                                         }
631                                 }
632                                 
633                                 if(trashCode.length() == 0){
634                                         currSeq.setAligned(currSeq.getUnaligned());
635                                         currSeq.printSequence(trimFASTAFile);
636                                         
637                                         if(qFileName != ""){
638                                                 currQual.printQScores(trimQualFile);
639                                         }
640                                         
641                                         if(nameFile != ""){
642                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
643                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
644                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
645                                         }
646                                         
647                                         if(barcodes.size() != 0){
648                                                 string thisGroup = barcodeNameVector[barcodeIndex];
649                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
650                                                 
651                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
652                                                 
653                                                 if (nameFile != "") {
654                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
655                                                         if (itName != nameMap.end()) { 
656                                                                 vector<string> thisSeqsNames; 
657                                                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
658                                                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
659                                                                         outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
660                                                                 }
661                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
662                                                 }
663                                                 
664                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
665                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
666                                                 else { groupCounts[it->first]++; }
667                                                         
668                                         }
669                                         
670                                         
671                                         if(allFiles){
672                                                 ofstream output;
673                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
674                                                 currSeq.printSequence(output);
675                                                 output.close();
676                                                 
677                                                 if(qFileName != ""){
678                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
679                                                         currQual.printQScores(output);
680                                                         output.close();                                                 
681                                                 }
682                                                 
683                                                 if(nameFile != ""){
684                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
685                                                         if (itName != nameMap.end()) { 
686                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
687                                                                 output << itName->first << '\t' << itName->second << endl; 
688                                                                 output.close();
689                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
690                                                 }
691                                         }
692                                 }
693                                 else{
694                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
695                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
696                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
697                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
698                                         }
699                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
700                                         currSeq.setUnaligned(origSeq);
701                                         currSeq.setAligned(origSeq);
702                                         currSeq.printSequence(scrapFASTAFile);
703                                         if(qFileName != ""){
704                                                 currQual.printQScores(scrapQualFile);
705                                         }
706                                 }
707                                 count++;
708                         }
709                         
710                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
711                                 unsigned long int pos = inFASTA.tellg();
712                                 if ((pos == -1) || (pos >= line->end)) { break; }
713                         
714                         #else
715                                 if (inFASTA.eof()) { break; }
716                         #endif
717                         
718                         //report progress
719                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
720                         
721                 }
722                 //report progress
723                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
724                 
725                 
726                 inFASTA.close();
727                 trimFASTAFile.close();
728                 scrapFASTAFile.close();
729                 if (oligoFile != "") {   outGroupsFile.close();   }
730                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
731                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
732                 
733                 return count;
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
737                 exit(1);
738         }
739 }
740
741 /**************************************************************************************************/
742
743 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
744         try {
745 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
746                 int process = 1;
747                 int exitCommand = 1;
748                 processIDS.clear();
749                 
750                 //loop through and create all the processes you want
751                 while (process != processors) {
752                         int pid = fork();
753                         
754                         if (pid > 0) {
755                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
756                                 process++;
757                         }else if (pid == 0){
758                                 
759                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
760                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
761                                 vector<vector<string> > tempNameFileNames = nameFileNames;
762
763                                 if(allFiles){
764                                         ofstream temp;
765
766                                         for(int i=0;i<tempFASTAFileNames.size();i++){
767                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
768                                                         if (tempFASTAFileNames[i][j] != "") {
769                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
770                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
771
772                                                                 if(qFileName != ""){
773                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
774                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
775                                                                 }
776                                                                 if(nameFile != ""){
777                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
778                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
779                                                                 }
780                                                         }
781                                                 }
782                                         }
783                                 }
784                                                         
785                                 driverCreateTrim(filename,
786                                                                  qFileName,
787                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
788                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
789                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
790                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
791                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
792                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
793                                                                  (groupFile + toString(getpid()) + ".temp"),
794                                                                  tempFASTAFileNames,
795                                                                  tempPrimerQualFileNames,
796                                                                  tempNameFileNames,
797                                                                  lines[process],
798                                                                  qLines[process]);
799                                 
800                                 //pass groupCounts to parent
801                                 if(oligoFile != ""){
802                                         ofstream out;
803                                         string tempFile = filename + toString(getpid()) + ".num.temp";
804                                         m->openOutputFile(tempFile, out);
805                                         
806                                         out << groupCounts.size() << endl;
807                                         
808                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
809                                                 out << it->first << '\t' << it->second << endl;
810                                         }
811                                         out.close();
812                                 }
813                                 exit(0);
814                         }else { 
815                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
816                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
817                                 exit(0);
818                         }
819                 }
820                 
821                 //parent do my part
822                 ofstream temp;
823                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
824                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
825                 if(qFileName != ""){
826                         m->openOutputFile(trimQualFileName, temp);              temp.close();
827                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
828                 }
829                 if (nameFile != "") {
830                         m->openOutputFile(trimNameFileName, temp);              temp.close();
831                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
832                 }
833
834                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
835                 
836                 //force parent to wait until all the processes are done
837                 for (int i=0;i<processIDS.size();i++) { 
838                         int temp = processIDS[i];
839                         wait(&temp);
840                 }
841                 
842                 //append files
843                 for(int i=0;i<processIDS.size();i++){
844                         
845                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
846                         
847                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
848                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
849                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
850                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
851                         
852                         if(qFileName != ""){
853                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
854                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
855                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
856                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
857                         }
858                         
859                         if(nameFile != ""){
860                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
861                                 remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
862                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
863                                 remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
864                         }
865                         
866                         if(oligoFile != ""){
867                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
868                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
869                         }
870                         
871                         
872                         if(allFiles){
873                                 for(int j=0;j<fastaFileNames.size();j++){
874                                         for(int k=0;k<fastaFileNames[j].size();k++){
875                                                 if (fastaFileNames[j][k] != "") {
876                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
877                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
878                                                         
879                                                         if(qFileName != ""){
880                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
881                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
882                                                         }
883                                                         
884                                                         if(nameFile != ""){
885                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
886                                                                 remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
887                                                         }
888                                                 }
889                                         }
890                                 }
891                         }
892                         
893                         if(oligoFile != ""){
894                                 ifstream in;
895                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
896                                 m->openInputFile(tempFile, in);
897                                 int tempNum;
898                                 string group;
899                                 
900                                 in >> tempNum; m->gobble(in);
901                                 
902                                 if (tempNum != 0) {
903                                         while (!in.eof()) { 
904                                                 in >> group >> tempNum; m->gobble(in);
905                                 
906                                                 map<string, int>::iterator it = groupCounts.find(group);
907                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
908                                                 else { groupCounts[it->first] += tempNum; }
909                                         }
910                                 }
911                                 in.close(); remove(tempFile.c_str());
912                         }
913                         
914                 }
915         
916                 return exitCommand;
917 #endif          
918         }
919         catch(exception& e) {
920                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
921                 exit(1);
922         }
923 }
924
925 /**************************************************************************************************/
926
927 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
928         try {
929                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
930                 //set file positions for fasta file
931                 fastaFilePos = m->divideFile(filename, processors);
932                 
933                 if (qfilename == "") { return processors; }
934                 
935                 //get name of first sequence in each chunk
936                 map<string, int> firstSeqNames;
937                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
938                         ifstream in;
939                         m->openInputFile(filename, in);
940                         in.seekg(fastaFilePos[i]);
941                 
942                         Sequence temp(in); 
943                         firstSeqNames[temp.getName()] = i;
944                 
945                         in.close();
946                 }
947                                 
948                 //seach for filePos of each first name in the qfile and save in qfileFilePos
949                 ifstream inQual;
950                 m->openInputFile(qfilename, inQual);
951                 
952                 string input;
953                 while(!inQual.eof()){   
954                         input = m->getline(inQual);
955
956                         if (input.length() != 0) {
957                                 if(input[0] == '>'){ //this is a sequence name line
958                                         istringstream nameStream(input);
959                                         
960                                         string sname = "";  nameStream >> sname;
961                                         sname = sname.substr(1);
962                                         
963                                         map<string, int>::iterator it = firstSeqNames.find(sname);
964                                         
965                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
966                                                 unsigned long int pos = inQual.tellg(); 
967                                                 qfileFilePos.push_back(pos - input.length() - 1);       
968                                                 firstSeqNames.erase(it);
969                                         }
970                                 }
971                         }
972                         
973                         if (firstSeqNames.size() == 0) { break; }
974                 }
975                 inQual.close();
976                 
977                 
978                 if (firstSeqNames.size() != 0) { 
979                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
980                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
981                         }
982                         qFileName = "";
983                         return processors;
984                 }
985
986                 //get last file position of qfile
987                 FILE * pFile;
988                 unsigned long int size;
989                 
990                 //get num bytes in file
991                 pFile = fopen (qfilename.c_str(),"rb");
992                 if (pFile==NULL) perror ("Error opening file");
993                 else{
994                         fseek (pFile, 0, SEEK_END);
995                         size=ftell (pFile);
996                         fclose (pFile);
997                 }
998                 
999                 qfileFilePos.push_back(size);
1000                 
1001                 return processors;
1002                 
1003                 #else
1004                 
1005                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1006                         //get last file position of fastafile
1007                         FILE * pFile;
1008                         unsigned long int size;
1009                         
1010                         //get num bytes in file
1011                         pFile = fopen (filename.c_str(),"rb");
1012                         if (pFile==NULL) perror ("Error opening file");
1013                         else{
1014                                 fseek (pFile, 0, SEEK_END);
1015                                 size=ftell (pFile);
1016                                 fclose (pFile);
1017                         }
1018                         fastaFilePos.push_back(size);
1019                         
1020                         //get last file position of fastafile
1021                         FILE * qFile;
1022                         
1023                         //get num bytes in file
1024                         qFile = fopen (qfilename.c_str(),"rb");
1025                         if (qFile==NULL) perror ("Error opening file");
1026                         else{
1027                                 fseek (qFile, 0, SEEK_END);
1028                                 size=ftell (qFile);
1029                                 fclose (qFile);
1030                         }
1031                         qfileFilePos.push_back(size);
1032                 
1033                         return 1;
1034                 
1035                 #endif
1036         }
1037         catch(exception& e) {
1038                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1039                 exit(1);
1040         }
1041 }
1042
1043 //***************************************************************************************************************
1044
1045 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1046         try {
1047                 ifstream inOligos;
1048                 m->openInputFile(oligoFile, inOligos);
1049                 
1050                 ofstream test;
1051                 
1052                 string type, oligo, group;
1053
1054                 int indexPrimer = 0;
1055                 int indexBarcode = 0;
1056                 
1057                 while(!inOligos.eof()){
1058
1059                         inOligos >> type; 
1060                                         
1061                         if(type[0] == '#'){
1062                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1063                                 m->gobble(inOligos);
1064                         }
1065                         else{
1066                                 m->gobble(inOligos);
1067                                 //make type case insensitive
1068                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1069                                 
1070                                 inOligos >> oligo;
1071                                 
1072                                 for(int i=0;i<oligo.length();i++){
1073                                         oligo[i] = toupper(oligo[i]);
1074                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1075                                 }
1076                                 
1077                                 if(type == "FORWARD"){
1078                                         group = "";
1079                                         
1080                                         // get rest of line in case there is a primer name
1081                                         while (!inOligos.eof()) {       
1082                                                 char c = inOligos.get(); 
1083                                                 if (c == 10 || c == 13){        break;  }
1084                                                 else if (c == 32 || c == 9){;} //space or tab
1085                                                 else {  group += c;  }
1086                                         } 
1087                                         
1088                                         //check for repeat barcodes
1089                                         map<string, int>::iterator itPrime = primers.find(oligo);
1090                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1091                                         
1092                                         primers[oligo]=indexPrimer; indexPrimer++;              
1093                                         primerNameVector.push_back(group);
1094                                 }
1095                                 else if(type == "REVERSE"){
1096                                         Sequence oligoRC("reverse", oligo);
1097                                         oligoRC.reverseComplement();
1098                                         revPrimer.push_back(oligoRC.getUnaligned());
1099                                 }
1100                                 else if(type == "BARCODE"){
1101                                         inOligos >> group;
1102                                         
1103                                         //check for repeat barcodes
1104                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1105                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1106                                                 
1107                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1108                                         barcodeNameVector.push_back(group);
1109                                 }
1110                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1111                         }
1112                         m->gobble(inOligos);
1113                 }       
1114                 inOligos.close();
1115                 
1116                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1117                 
1118                 //add in potential combos
1119                 if(barcodeNameVector.size() == 0){
1120                         barcodes[""] = 0;
1121                         barcodeNameVector.push_back("");                        
1122                 }
1123                 
1124                 if(primerNameVector.size() == 0){
1125                         primers[""] = 0;
1126                         primerNameVector.push_back("");                 
1127                 }
1128                 
1129                 fastaFileNames.resize(barcodeNameVector.size());
1130                 for(int i=0;i<fastaFileNames.size();i++){
1131                         fastaFileNames[i].assign(primerNameVector.size(), "");
1132                 }
1133                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1134                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1135                 
1136                 if(allFiles){
1137                         set<string> uniqueNames; //used to cleanup outputFileNames
1138                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1139                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1140                                         
1141                                         string primerName = primerNameVector[itPrimer->second];
1142                                         string barcodeName = barcodeNameVector[itBar->second];
1143                                         
1144                                         string comboGroupName = "";
1145                                         string fastaFileName = "";
1146                                         string qualFileName = "";
1147                                         string nameFileName = "";
1148                                         
1149                                         if(primerName == ""){
1150                                                 comboGroupName = barcodeNameVector[itBar->second];
1151                                         }
1152                                         else{
1153                                                 if(barcodeName == ""){
1154                                                         comboGroupName = primerNameVector[itPrimer->second];
1155                                                 }
1156                                                 else{
1157                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1158                                                 }
1159                                         }
1160                                         
1161                                         
1162                                         ofstream temp;
1163                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1164                                         if (uniqueNames.count(fastaFileName) == 0) {
1165                                                 outputNames.push_back(fastaFileName);
1166                                                 outputTypes["fasta"].push_back(fastaFileName);
1167                                                 uniqueNames.insert(fastaFileName);
1168                                         }
1169                                         
1170                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1171                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1172                                         
1173                                         if(qFileName != ""){
1174                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1175                                                 if (uniqueNames.count(qualFileName) == 0) {
1176                                                         outputNames.push_back(qualFileName);
1177                                                         outputTypes["qfile"].push_back(qualFileName);
1178                                                 }
1179                                                 
1180                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1181                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1182                                         }
1183                                         
1184                                         if(nameFile != ""){
1185                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1186                                                 if (uniqueNames.count(nameFileName) == 0) {
1187                                                         outputNames.push_back(nameFileName);
1188                                                         outputTypes["name"].push_back(nameFileName);
1189                                                 }
1190                                                 
1191                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1192                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1193                                         }
1194                                         
1195                                 }
1196                         }
1197                 }
1198                 numFPrimers = primers.size();
1199                 numRPrimers = revPrimer.size();
1200
1201         }
1202         catch(exception& e) {
1203                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1204                 exit(1);
1205         }
1206 }
1207
1208 //***************************************************************************************************************
1209
1210 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1211         try {
1212                 
1213                 string rawSequence = seq.getUnaligned();
1214                 int success = bdiffs + 1;       //guilty until proven innocent
1215                 
1216                 //can you find the barcode
1217                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1218                         string oligo = it->first;
1219                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1220                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1221                                 break;  
1222                         }
1223                         
1224                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1225                                 group = it->second;
1226                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1227                                 
1228                                 if(qual.getName() != ""){
1229                                         qual.trimQScores(oligo.length(), -1);
1230                                 }
1231                                 
1232                                 success = 0;
1233                                 break;
1234                         }
1235                 }
1236                 
1237                 //if you found the barcode or if you don't want to allow for diffs
1238                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1239                 
1240                 else { //try aligning and see if you can find it
1241
1242                         int maxLength = 0;
1243
1244                         Alignment* alignment;
1245                         if (barcodes.size() > 0) {
1246                                 map<string,int>::iterator it=barcodes.begin();
1247
1248                                 for(it;it!=barcodes.end();it++){
1249                                         if(it->first.length() > maxLength){
1250                                                 maxLength = it->first.length();
1251                                         }
1252                                 }
1253                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1254
1255                         }else{ alignment = NULL; } 
1256                         
1257                         //can you find the barcode
1258                         int minDiff = 1e6;
1259                         int minCount = 1;
1260                         int minGroup = -1;
1261                         int minPos = 0;
1262                         
1263                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1264                                 string oligo = it->first;
1265 //                              int length = oligo.length();
1266                                 
1267                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1268                                         success = bdiffs + 10;
1269                                         break;
1270                                 }
1271                                 
1272                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1273                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1274                                 oligo = alignment->getSeqAAln();
1275                                 string temp = alignment->getSeqBAln();
1276                 
1277                                 int alnLength = oligo.length();
1278                                 
1279                                 for(int i=oligo.length()-1;i>=0;i--){
1280                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1281                                 }
1282                                 oligo = oligo.substr(0,alnLength);
1283                                 temp = temp.substr(0,alnLength);
1284                                 
1285                                 int numDiff = countDiffs(oligo, temp);
1286                                 
1287                                 if(numDiff < minDiff){
1288                                         minDiff = numDiff;
1289                                         minCount = 1;
1290                                         minGroup = it->second;
1291                                         minPos = 0;
1292                                         for(int i=0;i<alnLength;i++){
1293                                                 if(temp[i] != '-'){
1294                                                         minPos++;
1295                                                 }
1296                                         }
1297                                 }
1298                                 else if(numDiff == minDiff){
1299                                         minCount++;
1300                                 }
1301
1302                         }
1303
1304                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1305                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1306                         else{                                                                                                   //use the best match
1307                                 group = minGroup;
1308                                 seq.setUnaligned(rawSequence.substr(minPos));
1309                                 
1310                                 if(qual.getName() != ""){
1311                                         qual.trimQScores(minPos, -1);
1312                                 }
1313                                 success = minDiff;
1314                         }
1315                         
1316                         if (alignment != NULL) {  delete alignment;  }
1317                         
1318                 }
1319         
1320                 return success;
1321                 
1322         }
1323         catch(exception& e) {
1324                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1325                 exit(1);
1326         }
1327
1328 }
1329
1330 //***************************************************************************************************************
1331
1332 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1333         try {
1334                 string rawSequence = seq.getUnaligned();
1335                 int success = pdiffs + 1;       //guilty until proven innocent
1336                 
1337                 //can you find the primer
1338                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1339                         string oligo = it->first;
1340                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1341                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1342                                 break;  
1343                         }
1344                         
1345                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1346                                 group = it->second;
1347                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1348                                 if(qual.getName() != ""){
1349                                         qual.trimQScores(oligo.length(), -1);
1350                                 }
1351                                 success = 0;
1352                                 break;
1353                         }
1354                 }
1355
1356                 //if you found the barcode or if you don't want to allow for diffs
1357                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1358                 
1359                 else { //try aligning and see if you can find it
1360
1361                         int maxLength = 0;
1362
1363                         Alignment* alignment;
1364                         if (primers.size() > 0) {
1365                                 map<string,int>::iterator it=primers.begin();
1366
1367                                 for(it;it!=primers.end();it++){
1368                                         if(it->first.length() > maxLength){
1369                                                 maxLength = it->first.length();
1370                                         }
1371                                 }
1372                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1373
1374                         }else{ alignment = NULL; } 
1375                         
1376                         //can you find the barcode
1377                         int minDiff = 1e6;
1378                         int minCount = 1;
1379                         int minGroup = -1;
1380                         int minPos = 0;
1381                         
1382                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1383                                 string oligo = it->first;
1384 //                              int length = oligo.length();
1385                                 
1386                                 if(rawSequence.length() < maxLength){   
1387                                         success = pdiffs + 100;
1388                                         break;
1389                                 }
1390                                 
1391                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1392                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1393                                 oligo = alignment->getSeqAAln();
1394                                 string temp = alignment->getSeqBAln();
1395                 
1396                                 int alnLength = oligo.length();
1397                                 
1398                                 for(int i=oligo.length()-1;i>=0;i--){
1399                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1400                                 }
1401                                 oligo = oligo.substr(0,alnLength);
1402                                 temp = temp.substr(0,alnLength);
1403                                 
1404                                 int numDiff = countDiffs(oligo, temp);
1405                                 
1406                                 if(numDiff < minDiff){
1407                                         minDiff = numDiff;
1408                                         minCount = 1;
1409                                         minGroup = it->second;
1410                                         minPos = 0;
1411                                         for(int i=0;i<alnLength;i++){
1412                                                 if(temp[i] != '-'){
1413                                                         minPos++;
1414                                                 }
1415                                         }
1416                                 }
1417                                 else if(numDiff == minDiff){
1418                                         minCount++;
1419                                 }
1420
1421                         }
1422
1423                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1424                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1425                         else{                                                                                                   //use the best match
1426                                 group = minGroup;
1427                                 seq.setUnaligned(rawSequence.substr(minPos));
1428                                 if(qual.getName() != ""){
1429                                         qual.trimQScores(minPos, -1);
1430                                 }
1431                                 success = minDiff;
1432                         }
1433                         
1434                         if (alignment != NULL) {  delete alignment;  }
1435                         
1436                 }
1437                 
1438                 return success;
1439
1440         }
1441         catch(exception& e) {
1442                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1443                 exit(1);
1444         }
1445 }
1446
1447 //***************************************************************************************************************
1448
1449 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1450         try {
1451                 string rawSequence = seq.getUnaligned();
1452                 bool success = 0;       //guilty until proven innocent
1453                 
1454                 for(int i=0;i<numRPrimers;i++){
1455                         string oligo = revPrimer[i];
1456                         
1457                         if(rawSequence.length() < oligo.length()){
1458                                 success = 0;
1459                                 break;
1460                         }
1461                         
1462                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1463                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1464                                 if(qual.getName() != ""){
1465                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1466                                 }
1467                                 success = 1;
1468                                 break;
1469                         }
1470                 }       
1471                 return success;
1472                 
1473         }
1474         catch(exception& e) {
1475                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1476                 exit(1);
1477         }
1478 }
1479
1480 //***************************************************************************************************************
1481
1482 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1483         try {
1484                 bool success = 1;
1485                 if(qscores.getName() != ""){
1486                         qscores.trimQScores(-1, keepFirst);
1487                 }
1488                 sequence.trim(keepFirst);
1489                 return success;
1490         }
1491         catch(exception& e) {
1492                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1493                 exit(1);
1494         }
1495         
1496 }       
1497
1498 //***************************************************************************************************************
1499
1500 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1501         try {
1502                 bool success = 0;
1503                 
1504                 int length = sequence.getNumBases() - removeLast;
1505                 
1506                 if(length > 0){
1507                         if(qscores.getName() != ""){
1508                                 qscores.trimQScores(-1, length);
1509                         }
1510                         sequence.trim(length);
1511                         success = 1;
1512                 }
1513                 else{
1514                         success = 0;
1515                 }
1516
1517                 return success;
1518         }
1519         catch(exception& e) {
1520                 m->errorOut(e, "removeLastTrim", "countDiffs");
1521                 exit(1);
1522         }
1523         
1524 }       
1525
1526 //***************************************************************************************************************
1527
1528 bool TrimSeqsCommand::cullLength(Sequence& seq){
1529         try {
1530         
1531                 int length = seq.getNumBases();
1532                 bool success = 0;       //guilty until proven innocent
1533                 
1534                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1535                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1536                 else                                                                                            {       success = 0;    }
1537                 
1538                 return success;
1539         
1540         }
1541         catch(exception& e) {
1542                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1543                 exit(1);
1544         }
1545         
1546 }
1547
1548 //***************************************************************************************************************
1549
1550 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1551         try {
1552                 int longHomoP = seq.getLongHomoPolymer();
1553                 bool success = 0;       //guilty until proven innocent
1554                 
1555                 if(longHomoP <= maxHomoP){      success = 1;    }
1556                 else                                    {       success = 0;    }
1557                 
1558                 return success;
1559         }
1560         catch(exception& e) {
1561                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1562                 exit(1);
1563         }
1564         
1565 }
1566
1567 //***************************************************************************************************************
1568
1569 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1570         try {
1571                 int numNs = seq.getAmbigBases();
1572                 bool success = 0;       //guilty until proven innocent
1573                 
1574                 if(numNs <= maxAmbig)   {       success = 1;    }
1575                 else                                    {       success = 0;    }
1576                 
1577                 return success;
1578         }
1579         catch(exception& e) {
1580                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1581                 exit(1);
1582         }
1583         
1584 }
1585
1586 //***************************************************************************************************************
1587
1588 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1589         try {
1590                 bool success = 1;
1591                 int length = oligo.length();
1592                 
1593                 for(int i=0;i<length;i++){
1594                         
1595                         if(oligo[i] != seq[i]){
1596                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1597                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1598                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1599                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1600                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1601                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1602                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1603                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1604                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1605                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1606                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1607                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1608                                 
1609                                 if(success == 0)        {       break;   }
1610                         }
1611                         else{
1612                                 success = 1;
1613                         }
1614                 }
1615                 
1616                 return success;
1617         }
1618         catch(exception& e) {
1619                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1620                 exit(1);
1621         }
1622
1623 }
1624
1625 //***************************************************************************************************************
1626
1627 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1628         try {
1629
1630                 int length = oligo.length();
1631                 int countDiffs = 0;
1632                 
1633                 for(int i=0;i<length;i++){
1634                                                                 
1635                         if(oligo[i] != seq[i]){
1636                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1637                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1638                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1639                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1640                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1641                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1642                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1643                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1644                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1645                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1646                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1647                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1648                         }
1649                         
1650                 }
1651                 
1652                 return countDiffs;
1653         }
1654         catch(exception& e) {
1655                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1656                 exit(1);
1657         }
1658
1659 }
1660
1661 //***************************************************************************************************************