]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
small bug with trimming of quality scores over a window. lost last base if sequence...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         else { m->setFastaFile(fastaFile); }
193                         
194                         //if the user changes the output directory command factory will send this info to us in the output parameter 
195                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
196                                 outputDir = ""; 
197                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
198                         }
199                 
200                         
201                         //check for optional parameter and set defaults
202                         // ...at some point should added some additional type checking...
203                         string temp;
204                         temp = validParameter.validFile(parameters, "flip", false);
205                         if (temp == "not found"){       flip = 0;       }
206                         else if(m->isTrue(temp))        {       flip = 1;       }
207                 
208                         temp = validParameter.validFile(parameters, "oligos", true);
209                         if (temp == "not found"){       oligoFile = "";         }
210                         else if(temp == "not open"){    abort = true;   } 
211                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
212                         
213                         
214                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
215                         convert(temp, maxAmbig);  
216
217                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
218                         convert(temp, maxHomoP);  
219
220                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
221                         convert(temp, minLength); 
222                         
223                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
224                         convert(temp, maxLength);
225                         
226                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
227                         convert(temp, bdiffs);
228                         
229                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
230                         convert(temp, pdiffs);
231                         
232                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
233                         convert(temp, tdiffs);
234                         
235                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
236                         
237                         temp = validParameter.validFile(parameters, "qfile", true);     
238                         if (temp == "not found")        {       qFileName = "";         }
239                         else if(temp == "not open")     {       abort = true;           }
240                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
241                         
242                         temp = validParameter.validFile(parameters, "name", true);      
243                         if (temp == "not found")        {       nameFile = "";          }
244                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
245                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
246                         
247                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
248                         convert(temp, qThreshold);
249                         
250                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
251                         qtrim = m->isTrue(temp);
252
253                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
254                         convert(temp, qRollAverage);
255
256                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
257                         convert(temp, qWindowAverage);
258
259                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
260                         convert(temp, qWindowSize);
261
262                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
263                         convert(temp, qWindowStep);
264
265                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
266                         convert(temp, qAverage);
267
268                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
269                         convert(temp, keepFirst);
270
271                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
272                         convert(temp, removeLast);
273                         
274                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
275                         allFiles = m->isTrue(temp);
276                         
277                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
278                         m->setProcessors(temp);
279                         convert(temp, processors); 
280                         
281                         
282                         if(allFiles && (oligoFile == "")){
283                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
284                         }
285                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
286                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
287                                 qAverage=0;
288                                 qThreshold=0;
289                         }
290                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
291                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
292                                 abort = true;
293                         }
294                 }
295
296         }
297         catch(exception& e) {
298                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
299                 exit(1);
300         }
301 }
302 //***************************************************************************************************************
303
304 int TrimSeqsCommand::execute(){
305         try{
306         
307                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
308                 
309                 numFPrimers = 0;  //this needs to be initialized
310                 numRPrimers = 0;
311                 vector<vector<string> > fastaFileNames;
312                 vector<vector<string> > qualFileNames;
313                 vector<vector<string> > nameFileNames;
314                 
315                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
316                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
317                 
318                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
319                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
320                 
321                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
322                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
323                 
324                 if (qFileName != "") {
325                         outputNames.push_back(trimQualFile);
326                         outputNames.push_back(scrapQualFile);
327                         outputTypes["qfile"].push_back(trimQualFile);
328                         outputTypes["qfile"].push_back(scrapQualFile); 
329                 }
330                 
331                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
332                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
333                 
334                 if (nameFile != "") {
335                         m->readNames(nameFile, nameMap);
336                         outputNames.push_back(trimNameFile);
337                         outputNames.push_back(scrapNameFile);
338                         outputTypes["name"].push_back(trimNameFile);
339                         outputTypes["name"].push_back(scrapNameFile); 
340                 }
341                 
342                 if (m->control_pressed) { return 0; }
343                 
344                 string outputGroupFileName;
345                 if(oligoFile != ""){
346                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
347                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
348                         getOligos(fastaFileNames, qualFileNames, nameFileNames);
349                 }
350                 
351                 vector<unsigned long int> fastaFilePos;
352                 vector<unsigned long int> qFilePos;
353                 
354                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
355                 
356                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
357                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
358                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
359                 }       
360                 if(qFileName == "")     {       qLines = lines; } //files with duds
361                 
362                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
363                                 if(processors == 1){
364                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
365                                 }else{
366                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
367                                 }       
368                 #else
369                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
370                 #endif
371                 
372                 if (m->control_pressed) {  return 0; }                  
373         
374                 if(allFiles){
375                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
376                         map<string, string>::iterator it;
377                         set<string> namesToRemove;
378                         for(int i=0;i<fastaFileNames.size();i++){
379                                 for(int j=0;j<fastaFileNames[0].size();j++){
380                                         if (fastaFileNames[i][j] != "") {
381                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
382                                                         if(m->isBlank(fastaFileNames[i][j])){
383                                                                 remove(fastaFileNames[i][j].c_str());
384                                                                 namesToRemove.insert(fastaFileNames[i][j]);
385                                                         
386                                                                 if(qFileName != ""){
387                                                                         remove(qualFileNames[i][j].c_str());
388                                                                         namesToRemove.insert(qualFileNames[i][j]);
389                                                                 }
390                                                                 
391                                                                 if(nameFile != ""){
392                                                                         remove(nameFileNames[i][j].c_str());
393                                                                         namesToRemove.insert(nameFileNames[i][j]);
394                                                                 }
395                                                         }else{  
396                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
397                                                                 if (it == uniqueFastaNames.end()) {     
398                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
399                                                                 }       
400                                                         }
401                                                 }
402                                         }
403                                 }
404                         }
405                         
406                         //remove names for outputFileNames, just cleans up the output
407                         vector<string> outputNames2;
408                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
409                         outputNames = outputNames2;
410                         
411                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
412                                 ifstream in;
413                                 m->openInputFile(it->first, in);
414                                 
415                                 ofstream out;
416                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
417                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
418                                 m->openOutputFile(thisGroupName, out);
419                                 
420                                 while (!in.eof()){
421                                         if (m->control_pressed) { break; }
422                                         
423                                         Sequence currSeq(in); m->gobble(in);
424                                         out << currSeq.getName() << '\t' << it->second << endl;
425                                 }
426                                 in.close();
427                                 out.close();
428                         }
429                 }
430                 
431                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
432
433                 //output group counts
434                 m->mothurOutEndLine();
435                 int total = 0;
436                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
437                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
438                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
439                 }
440                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
441                 
442                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
443
444                 //set fasta file as new current fastafile
445                 string current = "";
446                 itTypes = outputTypes.find("fasta");
447                 if (itTypes != outputTypes.end()) {
448                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
449                 }
450                 
451                 itTypes = outputTypes.find("name");
452                 if (itTypes != outputTypes.end()) {
453                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
454                 }
455                 
456                 itTypes = outputTypes.find("qfile");
457                 if (itTypes != outputTypes.end()) {
458                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
459                 }
460                 
461                 itTypes = outputTypes.find("group");
462                 if (itTypes != outputTypes.end()) {
463                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
464                 }
465
466                 m->mothurOutEndLine();
467                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
468                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
469                 m->mothurOutEndLine();
470                 
471                 return 0;       
472                         
473         }
474         catch(exception& e) {
475                 m->errorOut(e, "TrimSeqsCommand", "execute");
476                 exit(1);
477         }
478 }
479                 
480 /**************************************************************************************/
481
482 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
483                 
484         try {
485                 
486                 ofstream trimFASTAFile;
487                 m->openOutputFile(trimFileName, trimFASTAFile);
488                 
489                 ofstream scrapFASTAFile;
490                 m->openOutputFile(scrapFileName, scrapFASTAFile);
491                 
492                 ofstream trimQualFile;
493                 ofstream scrapQualFile;
494                 if(qFileName != ""){
495                         m->openOutputFile(trimQFileName, trimQualFile);
496                         m->openOutputFile(scrapQFileName, scrapQualFile);
497                 }
498                 
499                 ofstream trimNameFile;
500                 ofstream scrapNameFile;
501                 if(nameFile != ""){
502                         m->openOutputFile(trimNFileName, trimNameFile);
503                         m->openOutputFile(scrapNFileName, scrapNameFile);
504                 }
505                 
506                 
507                 ofstream outGroupsFile;
508                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
509                 if(allFiles){
510                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
511                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
512                                         if (fastaFileNames[i][j] != "") {
513                                                 ofstream temp;
514                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
515                                                 if(qFileName != ""){
516                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
517                                                 }
518                                                 
519                                                 if(nameFile != ""){
520                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
521                                                 }
522                                         }
523                                 }
524                         }
525                 }
526                 
527                 ifstream inFASTA;
528                 m->openInputFile(filename, inFASTA);
529                 inFASTA.seekg(line->start);
530                 
531                 ifstream qFile;
532                 if(qFileName != "")     {
533                         m->openInputFile(qFileName, qFile);
534                         qFile.seekg(qline->start);  
535                 }
536                 
537                 int count = 0;
538                 bool moreSeqs = 1;
539         
540                 while (moreSeqs) {
541                                 
542                         if (m->control_pressed) { 
543                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
544                                 if (oligoFile != "") {   outGroupsFile.close();   }
545
546                                 if(qFileName != ""){
547                                         qFile.close();
548                                 }
549                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
550
551                                 return 0;
552                         }
553                         
554                         int success = 1;
555                         string trashCode = "";
556                         int currentSeqsDiffs = 0;
557
558                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
559                         QualityScores currQual;
560                         if(qFileName != ""){
561                                 currQual = QualityScores(qFile);  m->gobble(qFile);
562                         }
563                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
564                         string origSeq = currSeq.getUnaligned();
565                         if (origSeq != "") {
566                                 
567                                 int barcodeIndex = 0;
568                                 int primerIndex = 0;
569                                 
570                                 if(barcodes.size() != 0){
571                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
572                                         if(success > bdiffs)            {       trashCode += 'b';       }
573                                         else{ currentSeqsDiffs += success;  }
574                                 }
575                                 
576                                 if(numFPrimers != 0){
577                                         success = stripForward(currSeq, currQual, primerIndex);
578                                         if(success > pdiffs)            {       trashCode += 'f';       }
579                                         else{ currentSeqsDiffs += success;  }
580                                 }
581                                 
582                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
583                                 
584                                 if(numRPrimers != 0){
585                                         success = stripReverse(currSeq, currQual);
586                                         if(!success)                            {       trashCode += 'r';       }
587                                 }
588
589                                 if(keepFirst != 0){
590                                         success = keepFirstTrim(currSeq, currQual);
591                                 }
592                                 
593                                 if(removeLast != 0){
594                                         success = removeLastTrim(currSeq, currQual);
595                                         if(!success)                            {       trashCode += 'l';       }
596                                 }
597
598                                 
599                                 if(qFileName != ""){
600                                         int origLength = currSeq.getNumBases();
601                                         
602                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
603                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
604                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
605                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
606                                         else                                            {       success = 1;                            }
607                                         
608                                         //you don't want to trim, if it fails above then scrap it
609                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
610                                         
611                                         if(!success)                            {       trashCode += 'q';       }
612                                 }                               
613                 
614                                 if(minLength > 0 || maxLength > 0){
615                                         success = cullLength(currSeq);
616                                         if(!success)                            {       trashCode += 'l';       }
617                                 }
618                                 if(maxHomoP > 0){
619                                         success = cullHomoP(currSeq);
620                                         if(!success)                            {       trashCode += 'h';       }
621                                 }
622                                 if(maxAmbig != -1){
623                                         success = cullAmbigs(currSeq);
624                                         if(!success)                            {       trashCode += 'n';       }
625                                 }
626                                 
627                                 if(flip){               // should go last                       
628                                         currSeq.reverseComplement();
629                                         if(qFileName != ""){
630                                                 currQual.flipQScores(); 
631                                         }
632                                 }
633                                 
634                                 if(trashCode.length() == 0){
635                                         currSeq.setAligned(currSeq.getUnaligned());
636                                         currSeq.printSequence(trimFASTAFile);
637                                         
638                                         if(qFileName != ""){
639                                                 currQual.printQScores(trimQualFile);
640                                         }
641                                         
642                                         if(nameFile != ""){
643                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
644                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
645                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
646                                         }
647                                         
648                                         if(barcodes.size() != 0){
649                                                 string thisGroup = barcodeNameVector[barcodeIndex];
650                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
651                                                 
652                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
653                                                 
654                                                 if (nameFile != "") {
655                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
656                                                         if (itName != nameMap.end()) { 
657                                                                 vector<string> thisSeqsNames; 
658                                                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
659                                                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
660                                                                         outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
661                                                                 }
662                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
663                                                 }
664                                                 
665                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
666                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
667                                                 else { groupCounts[it->first]++; }
668                                                         
669                                         }
670                                         
671                                         
672                                         if(allFiles){
673                                                 ofstream output;
674                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
675                                                 currSeq.printSequence(output);
676                                                 output.close();
677                                                 
678                                                 if(qFileName != ""){
679                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
680                                                         currQual.printQScores(output);
681                                                         output.close();                                                 
682                                                 }
683                                                 
684                                                 if(nameFile != ""){
685                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
686                                                         if (itName != nameMap.end()) { 
687                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
688                                                                 output << itName->first << '\t' << itName->second << endl; 
689                                                                 output.close();
690                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
691                                                 }
692                                         }
693                                 }
694                                 else{
695                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
696                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
697                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
698                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
699                                         }
700                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
701                                         currSeq.setUnaligned(origSeq);
702                                         currSeq.setAligned(origSeq);
703                                         currSeq.printSequence(scrapFASTAFile);
704                                         if(qFileName != ""){
705                                                 currQual.printQScores(scrapQualFile);
706                                         }
707                                 }
708                                 count++;
709                         }
710                         
711                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
712                                 unsigned long int pos = inFASTA.tellg();
713                                 if ((pos == -1) || (pos >= line->end)) { break; }
714                         
715                         #else
716                                 if (inFASTA.eof()) { break; }
717                         #endif
718                         
719                         //report progress
720                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
721                         
722                 }
723                 //report progress
724                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
725                 
726                 
727                 inFASTA.close();
728                 trimFASTAFile.close();
729                 scrapFASTAFile.close();
730                 if (oligoFile != "") {   outGroupsFile.close();   }
731                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
732                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
733                 
734                 return count;
735         }
736         catch(exception& e) {
737                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
738                 exit(1);
739         }
740 }
741
742 /**************************************************************************************************/
743
744 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
745         try {
746 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
747                 int process = 1;
748                 int exitCommand = 1;
749                 processIDS.clear();
750                 
751                 //loop through and create all the processes you want
752                 while (process != processors) {
753                         int pid = fork();
754                         
755                         if (pid > 0) {
756                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
757                                 process++;
758                         }else if (pid == 0){
759                                 
760                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
761                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
762                                 vector<vector<string> > tempNameFileNames = nameFileNames;
763
764                                 if(allFiles){
765                                         ofstream temp;
766
767                                         for(int i=0;i<tempFASTAFileNames.size();i++){
768                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
769                                                         if (tempFASTAFileNames[i][j] != "") {
770                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
771                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
772
773                                                                 if(qFileName != ""){
774                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
775                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
776                                                                 }
777                                                                 if(nameFile != ""){
778                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
779                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
780                                                                 }
781                                                         }
782                                                 }
783                                         }
784                                 }
785                                                         
786                                 driverCreateTrim(filename,
787                                                                  qFileName,
788                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
789                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
790                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
791                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
792                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
793                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
794                                                                  (groupFile + toString(getpid()) + ".temp"),
795                                                                  tempFASTAFileNames,
796                                                                  tempPrimerQualFileNames,
797                                                                  tempNameFileNames,
798                                                                  lines[process],
799                                                                  qLines[process]);
800                                 
801                                 //pass groupCounts to parent
802                                 if(oligoFile != ""){
803                                         ofstream out;
804                                         string tempFile = filename + toString(getpid()) + ".num.temp";
805                                         m->openOutputFile(tempFile, out);
806                                         
807                                         out << groupCounts.size() << endl;
808                                         
809                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
810                                                 out << it->first << '\t' << it->second << endl;
811                                         }
812                                         out.close();
813                                 }
814                                 exit(0);
815                         }else { 
816                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
817                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
818                                 exit(0);
819                         }
820                 }
821                 
822                 //parent do my part
823                 ofstream temp;
824                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
825                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
826                 if(qFileName != ""){
827                         m->openOutputFile(trimQualFileName, temp);              temp.close();
828                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
829                 }
830                 if (nameFile != "") {
831                         m->openOutputFile(trimNameFileName, temp);              temp.close();
832                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
833                 }
834
835                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
836                 
837                 //force parent to wait until all the processes are done
838                 for (int i=0;i<processIDS.size();i++) { 
839                         int temp = processIDS[i];
840                         wait(&temp);
841                 }
842                 
843                 //append files
844                 for(int i=0;i<processIDS.size();i++){
845                         
846                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
847                         
848                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
849                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
850                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
851                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
852                         
853                         if(qFileName != ""){
854                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
855                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
856                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
857                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
858                         }
859                         
860                         if(nameFile != ""){
861                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
862                                 remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
863                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
864                                 remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
865                         }
866                         
867                         if(oligoFile != ""){
868                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
869                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
870                         }
871                         
872                         
873                         if(allFiles){
874                                 for(int j=0;j<fastaFileNames.size();j++){
875                                         for(int k=0;k<fastaFileNames[j].size();k++){
876                                                 if (fastaFileNames[j][k] != "") {
877                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
878                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
879                                                         
880                                                         if(qFileName != ""){
881                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
882                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
883                                                         }
884                                                         
885                                                         if(nameFile != ""){
886                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
887                                                                 remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
888                                                         }
889                                                 }
890                                         }
891                                 }
892                         }
893                         
894                         if(oligoFile != ""){
895                                 ifstream in;
896                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
897                                 m->openInputFile(tempFile, in);
898                                 int tempNum;
899                                 string group;
900                                 
901                                 in >> tempNum; m->gobble(in);
902                                 
903                                 if (tempNum != 0) {
904                                         while (!in.eof()) { 
905                                                 in >> group >> tempNum; m->gobble(in);
906                                 
907                                                 map<string, int>::iterator it = groupCounts.find(group);
908                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
909                                                 else { groupCounts[it->first] += tempNum; }
910                                         }
911                                 }
912                                 in.close(); remove(tempFile.c_str());
913                         }
914                         
915                 }
916         
917                 return exitCommand;
918 #endif          
919         }
920         catch(exception& e) {
921                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
922                 exit(1);
923         }
924 }
925
926 /**************************************************************************************************/
927
928 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
929         try {
930                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
931                 //set file positions for fasta file
932                 fastaFilePos = m->divideFile(filename, processors);
933                 
934                 if (qfilename == "") { return processors; }
935                 
936                 //get name of first sequence in each chunk
937                 map<string, int> firstSeqNames;
938                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
939                         ifstream in;
940                         m->openInputFile(filename, in);
941                         in.seekg(fastaFilePos[i]);
942                 
943                         Sequence temp(in); 
944                         firstSeqNames[temp.getName()] = i;
945                 
946                         in.close();
947                 }
948                                 
949                 //seach for filePos of each first name in the qfile and save in qfileFilePos
950                 ifstream inQual;
951                 m->openInputFile(qfilename, inQual);
952                 
953                 string input;
954                 while(!inQual.eof()){   
955                         input = m->getline(inQual);
956
957                         if (input.length() != 0) {
958                                 if(input[0] == '>'){ //this is a sequence name line
959                                         istringstream nameStream(input);
960                                         
961                                         string sname = "";  nameStream >> sname;
962                                         sname = sname.substr(1);
963                                         
964                                         map<string, int>::iterator it = firstSeqNames.find(sname);
965                                         
966                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
967                                                 unsigned long int pos = inQual.tellg(); 
968                                                 qfileFilePos.push_back(pos - input.length() - 1);       
969                                                 firstSeqNames.erase(it);
970                                         }
971                                 }
972                         }
973                         
974                         if (firstSeqNames.size() == 0) { break; }
975                 }
976                 inQual.close();
977                 
978                 
979                 if (firstSeqNames.size() != 0) { 
980                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
981                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
982                         }
983                         qFileName = "";
984                         return processors;
985                 }
986
987                 //get last file position of qfile
988                 FILE * pFile;
989                 unsigned long int size;
990                 
991                 //get num bytes in file
992                 pFile = fopen (qfilename.c_str(),"rb");
993                 if (pFile==NULL) perror ("Error opening file");
994                 else{
995                         fseek (pFile, 0, SEEK_END);
996                         size=ftell (pFile);
997                         fclose (pFile);
998                 }
999                 
1000                 qfileFilePos.push_back(size);
1001                 
1002                 return processors;
1003                 
1004                 #else
1005                 
1006                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1007                         //get last file position of fastafile
1008                         FILE * pFile;
1009                         unsigned long int size;
1010                         
1011                         //get num bytes in file
1012                         pFile = fopen (filename.c_str(),"rb");
1013                         if (pFile==NULL) perror ("Error opening file");
1014                         else{
1015                                 fseek (pFile, 0, SEEK_END);
1016                                 size=ftell (pFile);
1017                                 fclose (pFile);
1018                         }
1019                         fastaFilePos.push_back(size);
1020                         
1021                         //get last file position of fastafile
1022                         FILE * qFile;
1023                         
1024                         //get num bytes in file
1025                         qFile = fopen (qfilename.c_str(),"rb");
1026                         if (qFile==NULL) perror ("Error opening file");
1027                         else{
1028                                 fseek (qFile, 0, SEEK_END);
1029                                 size=ftell (qFile);
1030                                 fclose (qFile);
1031                         }
1032                         qfileFilePos.push_back(size);
1033                 
1034                         return 1;
1035                 
1036                 #endif
1037         }
1038         catch(exception& e) {
1039                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1040                 exit(1);
1041         }
1042 }
1043
1044 //***************************************************************************************************************
1045
1046 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1047         try {
1048                 ifstream inOligos;
1049                 m->openInputFile(oligoFile, inOligos);
1050                 
1051                 ofstream test;
1052                 
1053                 string type, oligo, group;
1054
1055                 int indexPrimer = 0;
1056                 int indexBarcode = 0;
1057                 
1058                 while(!inOligos.eof()){
1059
1060                         inOligos >> type; 
1061                                         
1062                         if(type[0] == '#'){
1063                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1064                                 m->gobble(inOligos);
1065                         }
1066                         else{
1067                                 m->gobble(inOligos);
1068                                 //make type case insensitive
1069                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1070                                 
1071                                 inOligos >> oligo;
1072                                 
1073                                 for(int i=0;i<oligo.length();i++){
1074                                         oligo[i] = toupper(oligo[i]);
1075                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1076                                 }
1077                                 
1078                                 if(type == "FORWARD"){
1079                                         group = "";
1080                                         
1081                                         // get rest of line in case there is a primer name
1082                                         while (!inOligos.eof()) {       
1083                                                 char c = inOligos.get(); 
1084                                                 if (c == 10 || c == 13){        break;  }
1085                                                 else if (c == 32 || c == 9){;} //space or tab
1086                                                 else {  group += c;  }
1087                                         } 
1088                                         
1089                                         //check for repeat barcodes
1090                                         map<string, int>::iterator itPrime = primers.find(oligo);
1091                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1092                                         
1093                                         primers[oligo]=indexPrimer; indexPrimer++;              
1094                                         primerNameVector.push_back(group);
1095                                 }
1096                                 else if(type == "REVERSE"){
1097                                         Sequence oligoRC("reverse", oligo);
1098                                         oligoRC.reverseComplement();
1099                                         revPrimer.push_back(oligoRC.getUnaligned());
1100                                 }
1101                                 else if(type == "BARCODE"){
1102                                         inOligos >> group;
1103                                         
1104                                         //check for repeat barcodes
1105                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1106                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1107                                                 
1108                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1109                                         barcodeNameVector.push_back(group);
1110                                 }
1111                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1112                         }
1113                         m->gobble(inOligos);
1114                 }       
1115                 inOligos.close();
1116                 
1117                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1118                 
1119                 //add in potential combos
1120                 if(barcodeNameVector.size() == 0){
1121                         barcodes[""] = 0;
1122                         barcodeNameVector.push_back("");                        
1123                 }
1124                 
1125                 if(primerNameVector.size() == 0){
1126                         primers[""] = 0;
1127                         primerNameVector.push_back("");                 
1128                 }
1129                 
1130                 fastaFileNames.resize(barcodeNameVector.size());
1131                 for(int i=0;i<fastaFileNames.size();i++){
1132                         fastaFileNames[i].assign(primerNameVector.size(), "");
1133                 }
1134                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1135                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1136                 
1137                 if(allFiles){
1138                         set<string> uniqueNames; //used to cleanup outputFileNames
1139                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1140                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1141                                         
1142                                         string primerName = primerNameVector[itPrimer->second];
1143                                         string barcodeName = barcodeNameVector[itBar->second];
1144                                         
1145                                         string comboGroupName = "";
1146                                         string fastaFileName = "";
1147                                         string qualFileName = "";
1148                                         string nameFileName = "";
1149                                         
1150                                         if(primerName == ""){
1151                                                 comboGroupName = barcodeNameVector[itBar->second];
1152                                         }
1153                                         else{
1154                                                 if(barcodeName == ""){
1155                                                         comboGroupName = primerNameVector[itPrimer->second];
1156                                                 }
1157                                                 else{
1158                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1159                                                 }
1160                                         }
1161                                         
1162                                         
1163                                         ofstream temp;
1164                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1165                                         if (uniqueNames.count(fastaFileName) == 0) {
1166                                                 outputNames.push_back(fastaFileName);
1167                                                 outputTypes["fasta"].push_back(fastaFileName);
1168                                                 uniqueNames.insert(fastaFileName);
1169                                         }
1170                                         
1171                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1172                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1173                                         
1174                                         if(qFileName != ""){
1175                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1176                                                 if (uniqueNames.count(qualFileName) == 0) {
1177                                                         outputNames.push_back(qualFileName);
1178                                                         outputTypes["qfile"].push_back(qualFileName);
1179                                                 }
1180                                                 
1181                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1182                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1183                                         }
1184                                         
1185                                         if(nameFile != ""){
1186                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1187                                                 if (uniqueNames.count(nameFileName) == 0) {
1188                                                         outputNames.push_back(nameFileName);
1189                                                         outputTypes["name"].push_back(nameFileName);
1190                                                 }
1191                                                 
1192                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1193                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1194                                         }
1195                                         
1196                                 }
1197                         }
1198                 }
1199                 numFPrimers = primers.size();
1200                 numRPrimers = revPrimer.size();
1201
1202         }
1203         catch(exception& e) {
1204                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1205                 exit(1);
1206         }
1207 }
1208
1209 //***************************************************************************************************************
1210
1211 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1212         try {
1213                 
1214                 string rawSequence = seq.getUnaligned();
1215                 int success = bdiffs + 1;       //guilty until proven innocent
1216                 
1217                 //can you find the barcode
1218                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1219                         string oligo = it->first;
1220                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1221                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1222                                 break;  
1223                         }
1224                         
1225                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1226                                 group = it->second;
1227                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1228                                 
1229                                 if(qual.getName() != ""){
1230                                         qual.trimQScores(oligo.length(), -1);
1231                                 }
1232                                 
1233                                 success = 0;
1234                                 break;
1235                         }
1236                 }
1237                 
1238                 //if you found the barcode or if you don't want to allow for diffs
1239                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1240                 
1241                 else { //try aligning and see if you can find it
1242
1243                         int maxLength = 0;
1244
1245                         Alignment* alignment;
1246                         if (barcodes.size() > 0) {
1247                                 map<string,int>::iterator it=barcodes.begin();
1248
1249                                 for(it;it!=barcodes.end();it++){
1250                                         if(it->first.length() > maxLength){
1251                                                 maxLength = it->first.length();
1252                                         }
1253                                 }
1254                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1255
1256                         }else{ alignment = NULL; } 
1257                         
1258                         //can you find the barcode
1259                         int minDiff = 1e6;
1260                         int minCount = 1;
1261                         int minGroup = -1;
1262                         int minPos = 0;
1263                         
1264                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1265                                 string oligo = it->first;
1266 //                              int length = oligo.length();
1267                                 
1268                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1269                                         success = bdiffs + 10;
1270                                         break;
1271                                 }
1272                                 
1273                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1274                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1275                                 oligo = alignment->getSeqAAln();
1276                                 string temp = alignment->getSeqBAln();
1277                 
1278                                 int alnLength = oligo.length();
1279                                 
1280                                 for(int i=oligo.length()-1;i>=0;i--){
1281                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1282                                 }
1283                                 oligo = oligo.substr(0,alnLength);
1284                                 temp = temp.substr(0,alnLength);
1285                                 
1286                                 int numDiff = countDiffs(oligo, temp);
1287                                 
1288                                 if(numDiff < minDiff){
1289                                         minDiff = numDiff;
1290                                         minCount = 1;
1291                                         minGroup = it->second;
1292                                         minPos = 0;
1293                                         for(int i=0;i<alnLength;i++){
1294                                                 if(temp[i] != '-'){
1295                                                         minPos++;
1296                                                 }
1297                                         }
1298                                 }
1299                                 else if(numDiff == minDiff){
1300                                         minCount++;
1301                                 }
1302
1303                         }
1304
1305                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1306                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1307                         else{                                                                                                   //use the best match
1308                                 group = minGroup;
1309                                 seq.setUnaligned(rawSequence.substr(minPos));
1310                                 
1311                                 if(qual.getName() != ""){
1312                                         qual.trimQScores(minPos, -1);
1313                                 }
1314                                 success = minDiff;
1315                         }
1316                         
1317                         if (alignment != NULL) {  delete alignment;  }
1318                         
1319                 }
1320         
1321                 return success;
1322                 
1323         }
1324         catch(exception& e) {
1325                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1326                 exit(1);
1327         }
1328
1329 }
1330
1331 //***************************************************************************************************************
1332
1333 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1334         try {
1335                 string rawSequence = seq.getUnaligned();
1336                 int success = pdiffs + 1;       //guilty until proven innocent
1337                 
1338                 //can you find the primer
1339                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1340                         string oligo = it->first;
1341                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1342                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1343                                 break;  
1344                         }
1345                         
1346                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1347                                 group = it->second;
1348                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1349                                 if(qual.getName() != ""){
1350                                         qual.trimQScores(oligo.length(), -1);
1351                                 }
1352                                 success = 0;
1353                                 break;
1354                         }
1355                 }
1356
1357                 //if you found the barcode or if you don't want to allow for diffs
1358                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1359                 
1360                 else { //try aligning and see if you can find it
1361
1362                         int maxLength = 0;
1363
1364                         Alignment* alignment;
1365                         if (primers.size() > 0) {
1366                                 map<string,int>::iterator it=primers.begin();
1367
1368                                 for(it;it!=primers.end();it++){
1369                                         if(it->first.length() > maxLength){
1370                                                 maxLength = it->first.length();
1371                                         }
1372                                 }
1373                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1374
1375                         }else{ alignment = NULL; } 
1376                         
1377                         //can you find the barcode
1378                         int minDiff = 1e6;
1379                         int minCount = 1;
1380                         int minGroup = -1;
1381                         int minPos = 0;
1382                         
1383                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1384                                 string oligo = it->first;
1385 //                              int length = oligo.length();
1386                                 
1387                                 if(rawSequence.length() < maxLength){   
1388                                         success = pdiffs + 100;
1389                                         break;
1390                                 }
1391                                 
1392                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1393                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1394                                 oligo = alignment->getSeqAAln();
1395                                 string temp = alignment->getSeqBAln();
1396                 
1397                                 int alnLength = oligo.length();
1398                                 
1399                                 for(int i=oligo.length()-1;i>=0;i--){
1400                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1401                                 }
1402                                 oligo = oligo.substr(0,alnLength);
1403                                 temp = temp.substr(0,alnLength);
1404                                 
1405                                 int numDiff = countDiffs(oligo, temp);
1406                                 
1407                                 if(numDiff < minDiff){
1408                                         minDiff = numDiff;
1409                                         minCount = 1;
1410                                         minGroup = it->second;
1411                                         minPos = 0;
1412                                         for(int i=0;i<alnLength;i++){
1413                                                 if(temp[i] != '-'){
1414                                                         minPos++;
1415                                                 }
1416                                         }
1417                                 }
1418                                 else if(numDiff == minDiff){
1419                                         minCount++;
1420                                 }
1421
1422                         }
1423
1424                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1425                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1426                         else{                                                                                                   //use the best match
1427                                 group = minGroup;
1428                                 seq.setUnaligned(rawSequence.substr(minPos));
1429                                 if(qual.getName() != ""){
1430                                         qual.trimQScores(minPos, -1);
1431                                 }
1432                                 success = minDiff;
1433                         }
1434                         
1435                         if (alignment != NULL) {  delete alignment;  }
1436                         
1437                 }
1438                 
1439                 return success;
1440
1441         }
1442         catch(exception& e) {
1443                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1444                 exit(1);
1445         }
1446 }
1447
1448 //***************************************************************************************************************
1449
1450 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1451         try {
1452                 string rawSequence = seq.getUnaligned();
1453                 bool success = 0;       //guilty until proven innocent
1454                 
1455                 for(int i=0;i<numRPrimers;i++){
1456                         string oligo = revPrimer[i];
1457                         
1458                         if(rawSequence.length() < oligo.length()){
1459                                 success = 0;
1460                                 break;
1461                         }
1462                         
1463                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1464                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1465                                 if(qual.getName() != ""){
1466                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1467                                 }
1468                                 success = 1;
1469                                 break;
1470                         }
1471                 }       
1472                 return success;
1473                 
1474         }
1475         catch(exception& e) {
1476                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1477                 exit(1);
1478         }
1479 }
1480
1481 //***************************************************************************************************************
1482
1483 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1484         try {
1485                 bool success = 1;
1486                 if(qscores.getName() != ""){
1487                         qscores.trimQScores(-1, keepFirst);
1488                 }
1489                 sequence.trim(keepFirst);
1490                 return success;
1491         }
1492         catch(exception& e) {
1493                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1494                 exit(1);
1495         }
1496         
1497 }       
1498
1499 //***************************************************************************************************************
1500
1501 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1502         try {
1503                 bool success = 0;
1504                 
1505                 int length = sequence.getNumBases() - removeLast;
1506                 
1507                 if(length > 0){
1508                         if(qscores.getName() != ""){
1509                                 qscores.trimQScores(-1, length);
1510                         }
1511                         sequence.trim(length);
1512                         success = 1;
1513                 }
1514                 else{
1515                         success = 0;
1516                 }
1517
1518                 return success;
1519         }
1520         catch(exception& e) {
1521                 m->errorOut(e, "removeLastTrim", "countDiffs");
1522                 exit(1);
1523         }
1524         
1525 }       
1526
1527 //***************************************************************************************************************
1528
1529 bool TrimSeqsCommand::cullLength(Sequence& seq){
1530         try {
1531         
1532                 int length = seq.getNumBases();
1533                 bool success = 0;       //guilty until proven innocent
1534                 
1535                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1536                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1537                 else                                                                                            {       success = 0;    }
1538                 
1539                 return success;
1540         
1541         }
1542         catch(exception& e) {
1543                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1544                 exit(1);
1545         }
1546         
1547 }
1548
1549 //***************************************************************************************************************
1550
1551 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1552         try {
1553                 int longHomoP = seq.getLongHomoPolymer();
1554                 bool success = 0;       //guilty until proven innocent
1555                 
1556                 if(longHomoP <= maxHomoP){      success = 1;    }
1557                 else                                    {       success = 0;    }
1558                 
1559                 return success;
1560         }
1561         catch(exception& e) {
1562                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1563                 exit(1);
1564         }
1565         
1566 }
1567
1568 //***************************************************************************************************************
1569
1570 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1571         try {
1572                 int numNs = seq.getAmbigBases();
1573                 bool success = 0;       //guilty until proven innocent
1574                 
1575                 if(numNs <= maxAmbig)   {       success = 1;    }
1576                 else                                    {       success = 0;    }
1577                 
1578                 return success;
1579         }
1580         catch(exception& e) {
1581                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1582                 exit(1);
1583         }
1584         
1585 }
1586
1587 //***************************************************************************************************************
1588
1589 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1590         try {
1591                 bool success = 1;
1592                 int length = oligo.length();
1593                 
1594                 for(int i=0;i<length;i++){
1595                         
1596                         if(oligo[i] != seq[i]){
1597                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1598                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1599                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1600                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1601                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1602                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1603                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1604                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1605                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1606                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1607                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1608                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1609                                 
1610                                 if(success == 0)        {       break;   }
1611                         }
1612                         else{
1613                                 success = 1;
1614                         }
1615                 }
1616                 
1617                 return success;
1618         }
1619         catch(exception& e) {
1620                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1621                 exit(1);
1622         }
1623
1624 }
1625
1626 //***************************************************************************************************************
1627
1628 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1629         try {
1630
1631                 int length = oligo.length();
1632                 int countDiffs = 0;
1633                 
1634                 for(int i=0;i<length;i++){
1635                                                                 
1636                         if(oligo[i] != seq[i]){
1637                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1638                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1639                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1640                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1641                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1642                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1643                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1644                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1645                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1646                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1647                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1648                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1649                         }
1650                         
1651                 }
1652                 
1653                 return countDiffs;
1654         }
1655         catch(exception& e) {
1656                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1657                 exit(1);
1658         }
1659
1660 }
1661
1662 //***************************************************************************************************************