]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
changed normalize.shared and sub.sample commands to fix bug in normalize.shared and...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         else { m->setFastaFile(fastaFile); }
193                         
194                         //if the user changes the output directory command factory will send this info to us in the output parameter 
195                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
196                                 outputDir = ""; 
197                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
198                         }
199                 
200                         
201                         //check for optional parameter and set defaults
202                         // ...at some point should added some additional type checking...
203                         string temp;
204                         temp = validParameter.validFile(parameters, "flip", false);
205                         if (temp == "not found"){       flip = 0;       }
206                         else if(m->isTrue(temp))        {       flip = 1;       }
207                 
208                         temp = validParameter.validFile(parameters, "oligos", true);
209                         if (temp == "not found"){       oligoFile = "";         }
210                         else if(temp == "not open"){    abort = true;   } 
211                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
212                         
213                         
214                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
215                         convert(temp, maxAmbig);  
216
217                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
218                         convert(temp, maxHomoP);  
219
220                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
221                         convert(temp, minLength); 
222                         
223                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
224                         convert(temp, maxLength);
225                         
226                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
227                         convert(temp, bdiffs);
228                         
229                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
230                         convert(temp, pdiffs);
231                         
232                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
233                         convert(temp, tdiffs);
234                         
235                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
236                         
237                         temp = validParameter.validFile(parameters, "qfile", true);     
238                         if (temp == "not found")        {       qFileName = "";         }
239                         else if(temp == "not open")     {       abort = true;           }
240                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
241                         
242                         temp = validParameter.validFile(parameters, "name", true);      
243                         if (temp == "not found")        {       nameFile = "";          }
244                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
245                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
246                         
247                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
248                         convert(temp, qThreshold);
249                         
250                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
251                         qtrim = m->isTrue(temp);
252
253                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
254                         convert(temp, qRollAverage);
255
256                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
257                         convert(temp, qWindowAverage);
258
259                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
260                         convert(temp, qWindowSize);
261
262                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
263                         convert(temp, qWindowStep);
264
265                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
266                         convert(temp, qAverage);
267
268                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
269                         convert(temp, keepFirst);
270
271                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
272                         convert(temp, removeLast);
273                         
274                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
275                         allFiles = m->isTrue(temp);
276                         
277                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
278                         m->setProcessors(temp);
279                         convert(temp, processors); 
280                         
281                         
282                         if(allFiles && (oligoFile == "")){
283                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
284                         }
285                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
286                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
287                                 qAverage=0;
288                                 qThreshold=0;
289                         }
290                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
291                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
292                                 abort = true;
293                         }
294                 }
295
296         }
297         catch(exception& e) {
298                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
299                 exit(1);
300         }
301 }
302 //***************************************************************************************************************
303
304 int TrimSeqsCommand::execute(){
305         try{
306         
307                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
308                 
309                 numFPrimers = 0;  //this needs to be initialized
310                 numRPrimers = 0;
311                 createGroup = false;
312                 vector<vector<string> > fastaFileNames;
313                 vector<vector<string> > qualFileNames;
314                 vector<vector<string> > nameFileNames;
315                 
316                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
317                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
318                 
319                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
320                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
321                 
322                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
323                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
324                 
325                 if (qFileName != "") {
326                         outputNames.push_back(trimQualFile);
327                         outputNames.push_back(scrapQualFile);
328                         outputTypes["qfile"].push_back(trimQualFile);
329                         outputTypes["qfile"].push_back(scrapQualFile); 
330                 }
331                 
332                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
333                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
334                 
335                 if (nameFile != "") {
336                         m->readNames(nameFile, nameMap);
337                         outputNames.push_back(trimNameFile);
338                         outputNames.push_back(scrapNameFile);
339                         outputTypes["name"].push_back(trimNameFile);
340                         outputTypes["name"].push_back(scrapNameFile); 
341                 }
342                 
343                 if (m->control_pressed) { return 0; }
344                 
345                 string outputGroupFileName;
346                 if(oligoFile != ""){
347                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
348                         if (createGroup) {
349                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
350                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
351                         }
352                 }
353                 
354                 vector<unsigned long int> fastaFilePos;
355                 vector<unsigned long int> qFilePos;
356                 
357                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
358                 
359                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
360                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
361                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
362                 }       
363                 if(qFileName == "")     {       qLines = lines; } //files with duds
364                 
365                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
366                                 if(processors == 1){
367                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
368                                 }else{
369                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
370                                 }       
371                 #else
372                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
373                 #endif
374                 
375                 if (m->control_pressed) {  return 0; }                  
376         
377                 if(allFiles){
378                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
379                         map<string, string>::iterator it;
380                         set<string> namesToRemove;
381                         for(int i=0;i<fastaFileNames.size();i++){
382                                 for(int j=0;j<fastaFileNames[0].size();j++){
383                                         if (fastaFileNames[i][j] != "") {
384                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
385                                                         if(m->isBlank(fastaFileNames[i][j])){
386                                                                 m->mothurRemove(fastaFileNames[i][j]);
387                                                                 namesToRemove.insert(fastaFileNames[i][j]);
388                                                         
389                                                                 if(qFileName != ""){
390                                                                         m->mothurRemove(qualFileNames[i][j]);
391                                                                         namesToRemove.insert(qualFileNames[i][j]);
392                                                                 }
393                                                                 
394                                                                 if(nameFile != ""){
395                                                                         m->mothurRemove(nameFileNames[i][j]);
396                                                                         namesToRemove.insert(nameFileNames[i][j]);
397                                                                 }
398                                                         }else{  
399                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
400                                                                 if (it == uniqueFastaNames.end()) {     
401                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
402                                                                 }       
403                                                         }
404                                                 }
405                                         }
406                                 }
407                         }
408                         
409                         //remove names for outputFileNames, just cleans up the output
410                         vector<string> outputNames2;
411                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
412                         outputNames = outputNames2;
413                         
414                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
415                                 ifstream in;
416                                 m->openInputFile(it->first, in);
417                                 
418                                 ofstream out;
419                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
420                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
421                                 m->openOutputFile(thisGroupName, out);
422                                 
423                                 while (!in.eof()){
424                                         if (m->control_pressed) { break; }
425                                         
426                                         Sequence currSeq(in); m->gobble(in);
427                                         out << currSeq.getName() << '\t' << it->second << endl;
428                                 }
429                                 in.close();
430                                 out.close();
431                         }
432                 }
433                 
434                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
435
436                 //output group counts
437                 m->mothurOutEndLine();
438                 int total = 0;
439                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
440                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
441                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
442                 }
443                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
444                 
445                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
446
447                 //set fasta file as new current fastafile
448                 string current = "";
449                 itTypes = outputTypes.find("fasta");
450                 if (itTypes != outputTypes.end()) {
451                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
452                 }
453                 
454                 itTypes = outputTypes.find("name");
455                 if (itTypes != outputTypes.end()) {
456                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
457                 }
458                 
459                 itTypes = outputTypes.find("qfile");
460                 if (itTypes != outputTypes.end()) {
461                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
462                 }
463                 
464                 itTypes = outputTypes.find("group");
465                 if (itTypes != outputTypes.end()) {
466                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
467                 }
468
469                 m->mothurOutEndLine();
470                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
471                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
472                 m->mothurOutEndLine();
473                 
474                 return 0;       
475                         
476         }
477         catch(exception& e) {
478                 m->errorOut(e, "TrimSeqsCommand", "execute");
479                 exit(1);
480         }
481 }
482                 
483 /**************************************************************************************/
484
485 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
486                 
487         try {
488                 
489                 ofstream trimFASTAFile;
490                 m->openOutputFile(trimFileName, trimFASTAFile);
491                 
492                 ofstream scrapFASTAFile;
493                 m->openOutputFile(scrapFileName, scrapFASTAFile);
494                 
495                 ofstream trimQualFile;
496                 ofstream scrapQualFile;
497                 if(qFileName != ""){
498                         m->openOutputFile(trimQFileName, trimQualFile);
499                         m->openOutputFile(scrapQFileName, scrapQualFile);
500                 }
501                 
502                 ofstream trimNameFile;
503                 ofstream scrapNameFile;
504                 if(nameFile != ""){
505                         m->openOutputFile(trimNFileName, trimNameFile);
506                         m->openOutputFile(scrapNFileName, scrapNameFile);
507                 }
508                 
509                 
510                 ofstream outGroupsFile;
511                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
512                 if(allFiles){
513                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
514                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
515                                         if (fastaFileNames[i][j] != "") {
516                                                 ofstream temp;
517                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
518                                                 if(qFileName != ""){
519                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
520                                                 }
521                                                 
522                                                 if(nameFile != ""){
523                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
524                                                 }
525                                         }
526                                 }
527                         }
528                 }
529                 
530                 ifstream inFASTA;
531                 m->openInputFile(filename, inFASTA);
532                 inFASTA.seekg(line->start);
533                 
534                 ifstream qFile;
535                 if(qFileName != "")     {
536                         m->openInputFile(qFileName, qFile);
537                         qFile.seekg(qline->start);  
538                 }
539                 
540                 int count = 0;
541                 bool moreSeqs = 1;
542         
543                 while (moreSeqs) {
544                                 
545                         if (m->control_pressed) { 
546                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
547                                 if (createGroup) {       outGroupsFile.close();   }
548
549                                 if(qFileName != ""){
550                                         qFile.close();
551                                 }
552                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
553
554                                 return 0;
555                         }
556                         
557                         int success = 1;
558                         string trashCode = "";
559                         int currentSeqsDiffs = 0;
560
561                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
562                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
563                         QualityScores currQual;
564                         if(qFileName != ""){
565                                 currQual = QualityScores(qFile);  m->gobble(qFile);
566                         }
567                         
568                         string origSeq = currSeq.getUnaligned();
569                         if (origSeq != "") {
570                                 
571                                 int barcodeIndex = 0;
572                                 int primerIndex = 0;
573                                 
574                                 if(barcodes.size() != 0){
575                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
576                                         if(success > bdiffs)            {       trashCode += 'b';       }
577                                         else{ currentSeqsDiffs += success;  }
578                                 }
579                                 
580                                 if(numFPrimers != 0){
581                                         success = stripForward(currSeq, currQual, primerIndex);
582                                         if(success > pdiffs)            {       trashCode += 'f';       }
583                                         else{ currentSeqsDiffs += success;  }
584                                 }
585                                 
586                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
587                                 
588                                 if(numRPrimers != 0){
589                                         success = stripReverse(currSeq, currQual);
590                                         if(!success)                            {       trashCode += 'r';       }
591                                 }
592
593                                 if(keepFirst != 0){
594                                         success = keepFirstTrim(currSeq, currQual);
595                                 }
596                                 
597                                 if(removeLast != 0){
598                                         success = removeLastTrim(currSeq, currQual);
599                                         if(!success)                            {       trashCode += 'l';       }
600                                 }
601
602                                 
603                                 if(qFileName != ""){
604                                         int origLength = currSeq.getNumBases();
605                                         
606                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
607                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
608                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
609                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
610                                         else                                            {       success = 1;                            }
611                                         
612                                         //you don't want to trim, if it fails above then scrap it
613                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
614                                         
615                                         if(!success)                            {       trashCode += 'q';       }
616                                 }                               
617                 
618                                 if(minLength > 0 || maxLength > 0){
619                                         success = cullLength(currSeq);
620                                         if(!success)                            {       trashCode += 'l';       }
621                                 }
622                                 if(maxHomoP > 0){
623                                         success = cullHomoP(currSeq);
624                                         if(!success)                            {       trashCode += 'h';       }
625                                 }
626                                 if(maxAmbig != -1){
627                                         success = cullAmbigs(currSeq);
628                                         if(!success)                            {       trashCode += 'n';       }
629                                 }
630                                 
631                                 if(flip){               // should go last                       
632                                         currSeq.reverseComplement();
633                                         if(qFileName != ""){
634                                                 currQual.flipQScores(); 
635                                         }
636                                 }
637                                 
638                                 if(trashCode.length() == 0){
639                                         currSeq.setAligned(currSeq.getUnaligned());
640                                         currSeq.printSequence(trimFASTAFile);
641                                         
642                                         if(qFileName != ""){
643                                                 currQual.printQScores(trimQualFile);
644                                         }
645                                         
646                                         if(nameFile != ""){
647                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
648                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
649                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
650                                         }
651                                         
652                                         if (createGroup) {
653                                                 if(barcodes.size() != 0){
654                                                         string thisGroup = barcodeNameVector[barcodeIndex];
655                                                         if (primers.size() != 0) { 
656                                                                 if (primerNameVector[primerIndex] != "") { 
657                                                                         if(thisGroup != "") {
658                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
659                                                                         }else {
660                                                                                 thisGroup = primerNameVector[primerIndex]; 
661                                                                         }
662                                                                 } 
663                                                         }
664                                                         
665                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
666                                                         
667                                                         if (nameFile != "") {
668                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
669                                                                 if (itName != nameMap.end()) { 
670                                                                         vector<string> thisSeqsNames; 
671                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
672                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
673                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
674                                                                         }
675                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
676                                                         }
677                                                         
678                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
679                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
680                                                         else { groupCounts[it->first]++; }
681                                                                 
682                                                 }
683                                         }
684                                         
685                                         if(allFiles){
686                                                 ofstream output;
687                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
688                                                 currSeq.printSequence(output);
689                                                 output.close();
690                                                 
691                                                 if(qFileName != ""){
692                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
693                                                         currQual.printQScores(output);
694                                                         output.close();                                                 
695                                                 }
696                                                 
697                                                 if(nameFile != ""){
698                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
699                                                         if (itName != nameMap.end()) { 
700                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
701                                                                 output << itName->first << '\t' << itName->second << endl; 
702                                                                 output.close();
703                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
704                                                 }
705                                         }
706                                 }
707                                 else{
708                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
709                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
710                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
711                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
712                                         }
713                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
714                                         currSeq.setUnaligned(origSeq);
715                                         currSeq.setAligned(origSeq);
716                                         currSeq.printSequence(scrapFASTAFile);
717                                         if(qFileName != ""){
718                                                 currQual.printQScores(scrapQualFile);
719                                         }
720                                 }
721                                 count++;
722                         }
723                         
724                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
725                                 unsigned long int pos = inFASTA.tellg();
726                                 if ((pos == -1) || (pos >= line->end)) { break; }
727                         
728                         #else
729                                 if (inFASTA.eof()) { break; }
730                         #endif
731                         
732                         //report progress
733                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
734                         
735                 }
736                 //report progress
737                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
738                 
739                 
740                 inFASTA.close();
741                 trimFASTAFile.close();
742                 scrapFASTAFile.close();
743                 if (createGroup) {       outGroupsFile.close();   }
744                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
745                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
746                 
747                 return count;
748         }
749         catch(exception& e) {
750                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
751                 exit(1);
752         }
753 }
754
755 /**************************************************************************************************/
756
757 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
758         try {
759 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
760                 int process = 1;
761                 int exitCommand = 1;
762                 processIDS.clear();
763                 
764                 //loop through and create all the processes you want
765                 while (process != processors) {
766                         int pid = fork();
767                         
768                         if (pid > 0) {
769                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
770                                 process++;
771                         }else if (pid == 0){
772                                 
773                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
774                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
775                                 vector<vector<string> > tempNameFileNames = nameFileNames;
776
777                                 if(allFiles){
778                                         ofstream temp;
779
780                                         for(int i=0;i<tempFASTAFileNames.size();i++){
781                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
782                                                         if (tempFASTAFileNames[i][j] != "") {
783                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
784                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
785
786                                                                 if(qFileName != ""){
787                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
788                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
789                                                                 }
790                                                                 if(nameFile != ""){
791                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
792                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
793                                                                 }
794                                                         }
795                                                 }
796                                         }
797                                 }
798                                                         
799                                 driverCreateTrim(filename,
800                                                                  qFileName,
801                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
802                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
803                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
804                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
805                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
806                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
807                                                                  (groupFile + toString(getpid()) + ".temp"),
808                                                                  tempFASTAFileNames,
809                                                                  tempPrimerQualFileNames,
810                                                                  tempNameFileNames,
811                                                                  lines[process],
812                                                                  qLines[process]);
813                                 
814                                 //pass groupCounts to parent
815                                 if(createGroup){
816                                         ofstream out;
817                                         string tempFile = filename + toString(getpid()) + ".num.temp";
818                                         m->openOutputFile(tempFile, out);
819                                         
820                                         out << groupCounts.size() << endl;
821                                         
822                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
823                                                 out << it->first << '\t' << it->second << endl;
824                                         }
825                                         out.close();
826                                 }
827                                 exit(0);
828                         }else { 
829                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
830                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
831                                 exit(0);
832                         }
833                 }
834                 
835                 //parent do my part
836                 ofstream temp;
837                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
838                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
839                 if(qFileName != ""){
840                         m->openOutputFile(trimQualFileName, temp);              temp.close();
841                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
842                 }
843                 if (nameFile != "") {
844                         m->openOutputFile(trimNameFileName, temp);              temp.close();
845                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
846                 }
847
848                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
849                 
850                 //force parent to wait until all the processes are done
851                 for (int i=0;i<processIDS.size();i++) { 
852                         int temp = processIDS[i];
853                         wait(&temp);
854                 }
855                 
856                 //append files
857                 for(int i=0;i<processIDS.size();i++){
858                         
859                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
860                         
861                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
862                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
863                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
864                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
865                         
866                         if(qFileName != ""){
867                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
868                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
869                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
870                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
871                         }
872                         
873                         if(nameFile != ""){
874                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
875                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
876                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
877                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
878                         }
879                         
880                         if(createGroup){
881                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
882                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
883                         }
884                         
885                         
886                         if(allFiles){
887                                 for(int j=0;j<fastaFileNames.size();j++){
888                                         for(int k=0;k<fastaFileNames[j].size();k++){
889                                                 if (fastaFileNames[j][k] != "") {
890                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
891                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
892                                                         
893                                                         if(qFileName != ""){
894                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
895                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
896                                                         }
897                                                         
898                                                         if(nameFile != ""){
899                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
900                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
901                                                         }
902                                                 }
903                                         }
904                                 }
905                         }
906                         
907                         if(createGroup){
908                                 ifstream in;
909                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
910                                 m->openInputFile(tempFile, in);
911                                 int tempNum;
912                                 string group;
913                                 
914                                 in >> tempNum; m->gobble(in);
915                                 
916                                 if (tempNum != 0) {
917                                         while (!in.eof()) { 
918                                                 in >> group >> tempNum; m->gobble(in);
919                                 
920                                                 map<string, int>::iterator it = groupCounts.find(group);
921                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
922                                                 else { groupCounts[it->first] += tempNum; }
923                                         }
924                                 }
925                                 in.close(); m->mothurRemove(tempFile);
926                         }
927                         
928                 }
929         
930                 return exitCommand;
931 #endif          
932         }
933         catch(exception& e) {
934                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
935                 exit(1);
936         }
937 }
938
939 /**************************************************************************************************/
940
941 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
942         try {
943                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
944                 //set file positions for fasta file
945                 fastaFilePos = m->divideFile(filename, processors);
946                 
947                 if (qfilename == "") { return processors; }
948                 
949                 //get name of first sequence in each chunk
950                 map<string, int> firstSeqNames;
951                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
952                         ifstream in;
953                         m->openInputFile(filename, in);
954                         in.seekg(fastaFilePos[i]);
955                 
956                         Sequence temp(in); 
957                         firstSeqNames[temp.getName()] = i;
958                 
959                         in.close();
960                 }
961                                 
962                 //seach for filePos of each first name in the qfile and save in qfileFilePos
963                 ifstream inQual;
964                 m->openInputFile(qfilename, inQual);
965                 
966                 string input;
967                 while(!inQual.eof()){   
968                         input = m->getline(inQual);
969
970                         if (input.length() != 0) {
971                                 if(input[0] == '>'){ //this is a sequence name line
972                                         istringstream nameStream(input);
973                                         
974                                         string sname = "";  nameStream >> sname;
975                                         sname = sname.substr(1);
976                                         
977                                         map<string, int>::iterator it = firstSeqNames.find(sname);
978                                         
979                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
980                                                 unsigned long int pos = inQual.tellg(); 
981                                                 qfileFilePos.push_back(pos - input.length() - 1);       
982                                                 firstSeqNames.erase(it);
983                                         }
984                                 }
985                         }
986                         
987                         if (firstSeqNames.size() == 0) { break; }
988                 }
989                 inQual.close();
990                 
991                 
992                 if (firstSeqNames.size() != 0) { 
993                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
994                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
995                         }
996                         qFileName = "";
997                         return processors;
998                 }
999
1000                 //get last file position of qfile
1001                 FILE * pFile;
1002                 unsigned long int size;
1003                 
1004                 //get num bytes in file
1005                 pFile = fopen (qfilename.c_str(),"rb");
1006                 if (pFile==NULL) perror ("Error opening file");
1007                 else{
1008                         fseek (pFile, 0, SEEK_END);
1009                         size=ftell (pFile);
1010                         fclose (pFile);
1011                 }
1012                 
1013                 qfileFilePos.push_back(size);
1014                 
1015                 return processors;
1016                 
1017                 #else
1018                 
1019                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1020                         //get last file position of fastafile
1021                         FILE * pFile;
1022                         unsigned long int size;
1023                         
1024                         //get num bytes in file
1025                         pFile = fopen (filename.c_str(),"rb");
1026                         if (pFile==NULL) perror ("Error opening file");
1027                         else{
1028                                 fseek (pFile, 0, SEEK_END);
1029                                 size=ftell (pFile);
1030                                 fclose (pFile);
1031                         }
1032                         fastaFilePos.push_back(size);
1033                         
1034                         //get last file position of fastafile
1035                         FILE * qFile;
1036                         
1037                         //get num bytes in file
1038                         qFile = fopen (qfilename.c_str(),"rb");
1039                         if (qFile==NULL) perror ("Error opening file");
1040                         else{
1041                                 fseek (qFile, 0, SEEK_END);
1042                                 size=ftell (qFile);
1043                                 fclose (qFile);
1044                         }
1045                         qfileFilePos.push_back(size);
1046                 
1047                         return 1;
1048                 
1049                 #endif
1050         }
1051         catch(exception& e) {
1052                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1053                 exit(1);
1054         }
1055 }
1056
1057 //***************************************************************************************************************
1058
1059 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1060         try {
1061                 ifstream inOligos;
1062                 m->openInputFile(oligoFile, inOligos);
1063                 
1064                 ofstream test;
1065                 
1066                 string type, oligo, group;
1067
1068                 int indexPrimer = 0;
1069                 int indexBarcode = 0;
1070                 
1071                 while(!inOligos.eof()){
1072
1073                         inOligos >> type; 
1074                                         
1075                         if(type[0] == '#'){
1076                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1077                                 m->gobble(inOligos);
1078                         }
1079                         else{
1080                                 m->gobble(inOligos);
1081                                 //make type case insensitive
1082                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1083                                 
1084                                 inOligos >> oligo;
1085                                 
1086                                 for(int i=0;i<oligo.length();i++){
1087                                         oligo[i] = toupper(oligo[i]);
1088                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1089                                 }
1090                                 
1091                                 if(type == "FORWARD"){
1092                                         group = "";
1093                                         
1094                                         // get rest of line in case there is a primer name
1095                                         while (!inOligos.eof()) {       
1096                                                 char c = inOligos.get(); 
1097                                                 if (c == 10 || c == 13){        break;  }
1098                                                 else if (c == 32 || c == 9){;} //space or tab
1099                                                 else {  group += c;  }
1100                                         } 
1101                                         
1102                                         //check for repeat barcodes
1103                                         map<string, int>::iterator itPrime = primers.find(oligo);
1104                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1105                                         
1106                                         primers[oligo]=indexPrimer; indexPrimer++;              
1107                                         primerNameVector.push_back(group);
1108                                 }
1109                                 else if(type == "REVERSE"){
1110                                         Sequence oligoRC("reverse", oligo);
1111                                         oligoRC.reverseComplement();
1112                                         revPrimer.push_back(oligoRC.getUnaligned());
1113                                 }
1114                                 else if(type == "BARCODE"){
1115                                         inOligos >> group;
1116                                         
1117                                         //check for repeat barcodes
1118                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1119                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1120                                                 
1121                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1122                                         barcodeNameVector.push_back(group);
1123                                 }
1124                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1125                         }
1126                         m->gobble(inOligos);
1127                 }       
1128                 inOligos.close();
1129                 
1130                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1131                 
1132                 //add in potential combos
1133                 if(barcodeNameVector.size() == 0){
1134                         barcodes[""] = 0;
1135                         barcodeNameVector.push_back("");                        
1136                 }
1137                 
1138                 if(primerNameVector.size() == 0){
1139                         primers[""] = 0;
1140                         primerNameVector.push_back("");                 
1141                 }
1142                 
1143                 fastaFileNames.resize(barcodeNameVector.size());
1144                 for(int i=0;i<fastaFileNames.size();i++){
1145                         fastaFileNames[i].assign(primerNameVector.size(), "");
1146                 }
1147                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1148                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1149                 
1150                 if(allFiles){
1151                         set<string> uniqueNames; //used to cleanup outputFileNames
1152                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1153                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1154                                         
1155                                         string primerName = primerNameVector[itPrimer->second];
1156                                         string barcodeName = barcodeNameVector[itBar->second];
1157                                         
1158                                         string comboGroupName = "";
1159                                         string fastaFileName = "";
1160                                         string qualFileName = "";
1161                                         string nameFileName = "";
1162                                         
1163                                         if(primerName == ""){
1164                                                 comboGroupName = barcodeNameVector[itBar->second];
1165                                         }
1166                                         else{
1167                                                 if(barcodeName == ""){
1168                                                         comboGroupName = primerNameVector[itPrimer->second];
1169                                                 }
1170                                                 else{
1171                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1172                                                 }
1173                                         }
1174                                         
1175                                         
1176                                         ofstream temp;
1177                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1178                                         if (uniqueNames.count(fastaFileName) == 0) {
1179                                                 outputNames.push_back(fastaFileName);
1180                                                 outputTypes["fasta"].push_back(fastaFileName);
1181                                                 uniqueNames.insert(fastaFileName);
1182                                         }
1183                                         
1184                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1185                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1186                                         
1187                                         if(qFileName != ""){
1188                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1189                                                 if (uniqueNames.count(qualFileName) == 0) {
1190                                                         outputNames.push_back(qualFileName);
1191                                                         outputTypes["qfile"].push_back(qualFileName);
1192                                                 }
1193                                                 
1194                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1195                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1196                                         }
1197                                         
1198                                         if(nameFile != ""){
1199                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1200                                                 if (uniqueNames.count(nameFileName) == 0) {
1201                                                         outputNames.push_back(nameFileName);
1202                                                         outputTypes["name"].push_back(nameFileName);
1203                                                 }
1204                                                 
1205                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1206                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1207                                         }
1208                                         
1209                                 }
1210                         }
1211                 }
1212                 numFPrimers = primers.size();
1213                 numRPrimers = revPrimer.size();
1214                 
1215                 bool allBlank = true;
1216                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1217                         if (barcodeNameVector[i] != "") {
1218                                 allBlank = false;
1219                                 break;
1220                         }
1221                 }
1222                 for (int i = 0; i < primerNameVector.size(); i++) {
1223                         if (primerNameVector[i] != "") {
1224                                 allBlank = false;
1225                                 break;
1226                         }
1227                 }
1228                 
1229                 if (allBlank) {
1230                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1231                         allFiles = false;
1232                         return false;
1233                 }
1234                 
1235                 return true;
1236                 
1237         }
1238         catch(exception& e) {
1239                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1240                 exit(1);
1241         }
1242 }
1243
1244 //***************************************************************************************************************
1245
1246 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1247         try {
1248                 
1249                 string rawSequence = seq.getUnaligned();
1250                 int success = bdiffs + 1;       //guilty until proven innocent
1251                 
1252                 //can you find the barcode
1253                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1254                         string oligo = it->first;
1255                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1256                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1257                                 break;  
1258                         }
1259                         
1260                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1261                                 group = it->second;
1262                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1263                                 
1264                                 if(qual.getName() != ""){
1265                                         qual.trimQScores(oligo.length(), -1);
1266                                 }
1267                                 
1268                                 success = 0;
1269                                 break;
1270                         }
1271                 }
1272                 
1273                 //if you found the barcode or if you don't want to allow for diffs
1274                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1275                 
1276                 else { //try aligning and see if you can find it
1277
1278                         int maxLength = 0;
1279
1280                         Alignment* alignment;
1281                         if (barcodes.size() > 0) {
1282                                 map<string,int>::iterator it=barcodes.begin();
1283
1284                                 for(it;it!=barcodes.end();it++){
1285                                         if(it->first.length() > maxLength){
1286                                                 maxLength = it->first.length();
1287                                         }
1288                                 }
1289                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1290
1291                         }else{ alignment = NULL; } 
1292                         
1293                         //can you find the barcode
1294                         int minDiff = 1e6;
1295                         int minCount = 1;
1296                         int minGroup = -1;
1297                         int minPos = 0;
1298                         
1299                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1300                                 string oligo = it->first;
1301 //                              int length = oligo.length();
1302                                 
1303                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1304                                         success = bdiffs + 10;
1305                                         break;
1306                                 }
1307                                 
1308                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1309                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1310                                 oligo = alignment->getSeqAAln();
1311                                 string temp = alignment->getSeqBAln();
1312                 
1313                                 int alnLength = oligo.length();
1314                                 
1315                                 for(int i=oligo.length()-1;i>=0;i--){
1316                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1317                                 }
1318                                 oligo = oligo.substr(0,alnLength);
1319                                 temp = temp.substr(0,alnLength);
1320                                 
1321                                 int numDiff = countDiffs(oligo, temp);
1322                                 
1323                                 if(numDiff < minDiff){
1324                                         minDiff = numDiff;
1325                                         minCount = 1;
1326                                         minGroup = it->second;
1327                                         minPos = 0;
1328                                         for(int i=0;i<alnLength;i++){
1329                                                 if(temp[i] != '-'){
1330                                                         minPos++;
1331                                                 }
1332                                         }
1333                                 }
1334                                 else if(numDiff == minDiff){
1335                                         minCount++;
1336                                 }
1337
1338                         }
1339
1340                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1341                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1342                         else{                                                                                                   //use the best match
1343                                 group = minGroup;
1344                                 seq.setUnaligned(rawSequence.substr(minPos));
1345                                 
1346                                 if(qual.getName() != ""){
1347                                         qual.trimQScores(minPos, -1);
1348                                 }
1349                                 success = minDiff;
1350                         }
1351                         
1352                         if (alignment != NULL) {  delete alignment;  }
1353                         
1354                 }
1355         
1356                 return success;
1357                 
1358         }
1359         catch(exception& e) {
1360                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1361                 exit(1);
1362         }
1363
1364 }
1365
1366 //***************************************************************************************************************
1367
1368 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1369         try {
1370                 string rawSequence = seq.getUnaligned();
1371                 int success = pdiffs + 1;       //guilty until proven innocent
1372                 
1373                 //can you find the primer
1374                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1375                         string oligo = it->first;
1376                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1377                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1378                                 break;  
1379                         }
1380                         
1381                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1382                                 group = it->second;
1383                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1384                                 if(qual.getName() != ""){
1385                                         qual.trimQScores(oligo.length(), -1);
1386                                 }
1387                                 success = 0;
1388                                 break;
1389                         }
1390                 }
1391
1392                 //if you found the barcode or if you don't want to allow for diffs
1393                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1394                 
1395                 else { //try aligning and see if you can find it
1396
1397                         int maxLength = 0;
1398
1399                         Alignment* alignment;
1400                         if (primers.size() > 0) {
1401                                 map<string,int>::iterator it=primers.begin();
1402
1403                                 for(it;it!=primers.end();it++){
1404                                         if(it->first.length() > maxLength){
1405                                                 maxLength = it->first.length();
1406                                         }
1407                                 }
1408                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1409
1410                         }else{ alignment = NULL; } 
1411                         
1412                         //can you find the barcode
1413                         int minDiff = 1e6;
1414                         int minCount = 1;
1415                         int minGroup = -1;
1416                         int minPos = 0;
1417                         
1418                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1419                                 string oligo = it->first;
1420 //                              int length = oligo.length();
1421                                 
1422                                 if(rawSequence.length() < maxLength){   
1423                                         success = pdiffs + 100;
1424                                         break;
1425                                 }
1426                                 
1427                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1428                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1429                                 oligo = alignment->getSeqAAln();
1430                                 string temp = alignment->getSeqBAln();
1431                 
1432                                 int alnLength = oligo.length();
1433                                 
1434                                 for(int i=oligo.length()-1;i>=0;i--){
1435                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1436                                 }
1437                                 oligo = oligo.substr(0,alnLength);
1438                                 temp = temp.substr(0,alnLength);
1439                                 
1440                                 int numDiff = countDiffs(oligo, temp);
1441                                 
1442                                 if(numDiff < minDiff){
1443                                         minDiff = numDiff;
1444                                         minCount = 1;
1445                                         minGroup = it->second;
1446                                         minPos = 0;
1447                                         for(int i=0;i<alnLength;i++){
1448                                                 if(temp[i] != '-'){
1449                                                         minPos++;
1450                                                 }
1451                                         }
1452                                 }
1453                                 else if(numDiff == minDiff){
1454                                         minCount++;
1455                                 }
1456
1457                         }
1458
1459                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1460                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1461                         else{                                                                                                   //use the best match
1462                                 group = minGroup;
1463                                 seq.setUnaligned(rawSequence.substr(minPos));
1464                                 if(qual.getName() != ""){
1465                                         qual.trimQScores(minPos, -1);
1466                                 }
1467                                 success = minDiff;
1468                         }
1469                         
1470                         if (alignment != NULL) {  delete alignment;  }
1471                         
1472                 }
1473                 
1474                 return success;
1475
1476         }
1477         catch(exception& e) {
1478                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1479                 exit(1);
1480         }
1481 }
1482
1483 //***************************************************************************************************************
1484
1485 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1486         try {
1487                 string rawSequence = seq.getUnaligned();
1488                 bool success = 0;       //guilty until proven innocent
1489                 
1490                 for(int i=0;i<numRPrimers;i++){
1491                         string oligo = revPrimer[i];
1492                         
1493                         if(rawSequence.length() < oligo.length()){
1494                                 success = 0;
1495                                 break;
1496                         }
1497                         
1498                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1499                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1500                                 if(qual.getName() != ""){
1501                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1502                                 }
1503                                 success = 1;
1504                                 break;
1505                         }
1506                 }       
1507                 return success;
1508                 
1509         }
1510         catch(exception& e) {
1511                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1512                 exit(1);
1513         }
1514 }
1515
1516 //***************************************************************************************************************
1517
1518 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1519         try {
1520                 bool success = 1;
1521                 if(qscores.getName() != ""){
1522                         qscores.trimQScores(-1, keepFirst);
1523                 }
1524                 sequence.trim(keepFirst);
1525                 return success;
1526         }
1527         catch(exception& e) {
1528                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1529                 exit(1);
1530         }
1531         
1532 }       
1533
1534 //***************************************************************************************************************
1535
1536 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1537         try {
1538                 bool success = 0;
1539                 
1540                 int length = sequence.getNumBases() - removeLast;
1541                 
1542                 if(length > 0){
1543                         if(qscores.getName() != ""){
1544                                 qscores.trimQScores(-1, length);
1545                         }
1546                         sequence.trim(length);
1547                         success = 1;
1548                 }
1549                 else{
1550                         success = 0;
1551                 }
1552
1553                 return success;
1554         }
1555         catch(exception& e) {
1556                 m->errorOut(e, "removeLastTrim", "countDiffs");
1557                 exit(1);
1558         }
1559         
1560 }       
1561
1562 //***************************************************************************************************************
1563
1564 bool TrimSeqsCommand::cullLength(Sequence& seq){
1565         try {
1566         
1567                 int length = seq.getNumBases();
1568                 bool success = 0;       //guilty until proven innocent
1569                 
1570                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1571                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1572                 else                                                                                            {       success = 0;    }
1573                 
1574                 return success;
1575         
1576         }
1577         catch(exception& e) {
1578                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1579                 exit(1);
1580         }
1581         
1582 }
1583
1584 //***************************************************************************************************************
1585
1586 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1587         try {
1588                 int longHomoP = seq.getLongHomoPolymer();
1589                 bool success = 0;       //guilty until proven innocent
1590                 
1591                 if(longHomoP <= maxHomoP){      success = 1;    }
1592                 else                                    {       success = 0;    }
1593                 
1594                 return success;
1595         }
1596         catch(exception& e) {
1597                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1598                 exit(1);
1599         }
1600         
1601 }
1602
1603 //***************************************************************************************************************
1604
1605 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1606         try {
1607                 int numNs = seq.getAmbigBases();
1608                 bool success = 0;       //guilty until proven innocent
1609                 
1610                 if(numNs <= maxAmbig)   {       success = 1;    }
1611                 else                                    {       success = 0;    }
1612                 
1613                 return success;
1614         }
1615         catch(exception& e) {
1616                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1617                 exit(1);
1618         }
1619         
1620 }
1621
1622 //***************************************************************************************************************
1623
1624 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1625         try {
1626                 bool success = 1;
1627                 int length = oligo.length();
1628                 
1629                 for(int i=0;i<length;i++){
1630                         
1631                         if(oligo[i] != seq[i]){
1632                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1633                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1634                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1635                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1636                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1637                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1638                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1639                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1640                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1641                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1642                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1643                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1644                                 
1645                                 if(success == 0)        {       break;   }
1646                         }
1647                         else{
1648                                 success = 1;
1649                         }
1650                 }
1651                 
1652                 return success;
1653         }
1654         catch(exception& e) {
1655                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1656                 exit(1);
1657         }
1658
1659 }
1660
1661 //***************************************************************************************************************
1662
1663 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1664         try {
1665
1666                 int length = oligo.length();
1667                 int countDiffs = 0;
1668                 
1669                 for(int i=0;i<length;i++){
1670                                                                 
1671                         if(oligo[i] != seq[i]){
1672                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1673                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1674                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1675                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1676                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1677                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1678                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1679                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1680                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1681                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1682                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1683                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1684                         }
1685                         
1686                 }
1687                 
1688                 return countDiffs;
1689         }
1690         catch(exception& e) {
1691                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1692                 exit(1);
1693         }
1694
1695 }
1696
1697 //***************************************************************************************************************