]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
fixed bug with trim.seqs allfiles=t
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         
193                         //if the user changes the output directory command factory will send this info to us in the output parameter 
194                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
195                                 outputDir = ""; 
196                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
197                         }
198                 
199                         
200                         //check for optional parameter and set defaults
201                         // ...at some point should added some additional type checking...
202                         string temp;
203                         temp = validParameter.validFile(parameters, "flip", false);
204                         if (temp == "not found"){       flip = 0;       }
205                         else if(m->isTrue(temp))        {       flip = 1;       }
206                 
207                         temp = validParameter.validFile(parameters, "oligos", true);
208                         if (temp == "not found"){       oligoFile = "";         }
209                         else if(temp == "not open"){    abort = true;   } 
210                         else                                    {       oligoFile = temp;               }
211                         
212                         
213                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
214                         convert(temp, maxAmbig);  
215
216                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
217                         convert(temp, maxHomoP);  
218
219                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
220                         convert(temp, minLength); 
221                         
222                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
223                         convert(temp, maxLength);
224                         
225                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
226                         convert(temp, bdiffs);
227                         
228                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
229                         convert(temp, pdiffs);
230                         
231                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
232                         convert(temp, tdiffs);
233                         
234                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
235                         
236                         temp = validParameter.validFile(parameters, "qfile", true);     
237                         if (temp == "not found")        {       qFileName = "";         }
238                         else if(temp == "not open")     {       abort = true;           }
239                         else                                            {       qFileName = temp;       }
240                         
241                         temp = validParameter.validFile(parameters, "name", true);      
242                         if (temp == "not found")        {       nameFile = "";          }
243                         else if(temp == "not open")     {       abort = true;           }
244                         else                                            {       nameFile = temp;        }
245                         
246                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
247                         convert(temp, qThreshold);
248                         
249                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
250                         qtrim = m->isTrue(temp);
251
252                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
253                         convert(temp, qRollAverage);
254
255                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
256                         convert(temp, qWindowAverage);
257
258                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
259                         convert(temp, qWindowSize);
260
261                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
262                         convert(temp, qWindowStep);
263
264                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
265                         convert(temp, qAverage);
266
267                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
268                         convert(temp, keepFirst);
269
270                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
271                         convert(temp, removeLast);
272                         
273                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
274                         allFiles = m->isTrue(temp);
275                         
276                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
277                         m->setProcessors(temp);
278                         convert(temp, processors); 
279                         
280                         
281                         if(allFiles && (oligoFile == "")){
282                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
283                         }
284                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
285                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
286                                 qAverage=0;
287                                 qThreshold=0;
288                         }
289                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
290                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
291                                 abort = true;
292                         }
293                 }
294
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
298                 exit(1);
299         }
300 }
301 //***************************************************************************************************************
302
303 int TrimSeqsCommand::execute(){
304         try{
305         
306                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
307                 
308                 numFPrimers = 0;  //this needs to be initialized
309                 numRPrimers = 0;
310                 vector<vector<string> > fastaFileNames;
311                 vector<vector<string> > qualFileNames;
312                 vector<vector<string> > nameFileNames;
313                 
314                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
315                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
316                 
317                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
318                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
319                 
320                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
321                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
322                 
323                 if (qFileName != "") {
324                         outputNames.push_back(trimQualFile);
325                         outputNames.push_back(scrapQualFile);
326                         outputTypes["qfile"].push_back(trimQualFile);
327                         outputTypes["qfile"].push_back(scrapQualFile); 
328                 }
329                 
330                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
331                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
332                 
333                 if (nameFile != "") {
334                         m->readNames(nameFile, nameMap);
335                         outputNames.push_back(trimNameFile);
336                         outputNames.push_back(scrapNameFile);
337                         outputTypes["name"].push_back(trimNameFile);
338                         outputTypes["name"].push_back(scrapNameFile); 
339                 }
340                 
341                 if (m->control_pressed) { return 0; }
342                 
343                 string outputGroupFileName;
344                 if(oligoFile != ""){
345                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
346                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
347                         getOligos(fastaFileNames, qualFileNames, nameFileNames);
348                 }
349                 
350                 vector<unsigned long int> fastaFilePos;
351                 vector<unsigned long int> qFilePos;
352                 
353                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
354                 
355                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
356                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
357                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
358                 }       
359                 if(qFileName == "")     {       qLines = lines; } //files with duds
360                 
361                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
362                                 if(processors == 1){
363                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
364                                 }else{
365                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
366                                 }       
367                 #else
368                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
369                 #endif
370                 
371                 if (m->control_pressed) {  return 0; }                  
372         
373                 if(allFiles){
374                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
375                         map<string, string>::iterator it;
376                         set<string> namesToRemove;
377                         for(int i=0;i<fastaFileNames.size();i++){
378                                 for(int j=0;j<fastaFileNames[0].size();j++){
379                                         if (fastaFileNames[i][j] != "") {
380                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
381                                                         if(m->isBlank(fastaFileNames[i][j])){
382                                                                 remove(fastaFileNames[i][j].c_str());
383                                                                 namesToRemove.insert(fastaFileNames[i][j]);
384                                                         
385                                                                 if(qFileName != ""){
386                                                                         remove(qualFileNames[i][j].c_str());
387                                                                         namesToRemove.insert(qualFileNames[i][j]);
388                                                                 }
389                                                                 
390                                                                 if(nameFile != ""){
391                                                                         remove(nameFileNames[i][j].c_str());
392                                                                         namesToRemove.insert(nameFileNames[i][j]);
393                                                                 }
394                                                         }else{  
395                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
396                                                                 if (it == uniqueFastaNames.end()) {     
397                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
398                                                                 }       
399                                                         }
400                                                 }
401                                         }
402                                 }
403                         }
404                         
405                         //remove names for outputFileNames, just cleans up the output
406                         vector<string> outputNames2;
407                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
408                         outputNames = outputNames2;
409                         
410                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
411                                 ifstream in;
412                                 m->openInputFile(it->first, in);
413                                 
414                                 ofstream out;
415                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
416                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
417                                 m->openOutputFile(thisGroupName, out);
418                                 
419                                 while (!in.eof()){
420                                         if (m->control_pressed) { break; }
421                                         
422                                         Sequence currSeq(in); m->gobble(in);
423                                         out << currSeq.getName() << '\t' << it->second << endl;
424                                 }
425                                 in.close();
426                                 out.close();
427                         }
428                 }
429                 
430                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
431
432                 //output group counts
433                 m->mothurOutEndLine();
434                 int total = 0;
435                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
436                          total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine(); 
437                 }
438                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
439                 
440                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
441
442                 //set fasta file as new current fastafile
443                 string current = "";
444                 itTypes = outputTypes.find("fasta");
445                 if (itTypes != outputTypes.end()) {
446                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
447                 }
448                 
449                 itTypes = outputTypes.find("name");
450                 if (itTypes != outputTypes.end()) {
451                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
452                 }
453                 
454                 itTypes = outputTypes.find("qfile");
455                 if (itTypes != outputTypes.end()) {
456                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
457                 }
458                 
459                 itTypes = outputTypes.find("group");
460                 if (itTypes != outputTypes.end()) {
461                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
462                 }
463
464                 m->mothurOutEndLine();
465                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
466                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
467                 m->mothurOutEndLine();
468                 
469                 return 0;       
470                         
471         }
472         catch(exception& e) {
473                 m->errorOut(e, "TrimSeqsCommand", "execute");
474                 exit(1);
475         }
476 }
477                 
478 /**************************************************************************************/
479
480 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
481                 
482         try {
483                 
484                 ofstream trimFASTAFile;
485                 m->openOutputFile(trimFileName, trimFASTAFile);
486                 
487                 ofstream scrapFASTAFile;
488                 m->openOutputFile(scrapFileName, scrapFASTAFile);
489                 
490                 ofstream trimQualFile;
491                 ofstream scrapQualFile;
492                 if(qFileName != ""){
493                         m->openOutputFile(trimQFileName, trimQualFile);
494                         m->openOutputFile(scrapQFileName, scrapQualFile);
495                 }
496                 
497                 ofstream trimNameFile;
498                 ofstream scrapNameFile;
499                 if(nameFile != ""){
500                         m->openOutputFile(trimNFileName, trimNameFile);
501                         m->openOutputFile(scrapNFileName, scrapNameFile);
502                 }
503                 
504                 
505                 ofstream outGroupsFile;
506                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
507                 if(allFiles){
508                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
509                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
510                                         if (fastaFileNames[i][j] != "") {
511                                                 ofstream temp;
512                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
513                                                 if(qFileName != ""){
514                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
515                                                 }
516                                                 
517                                                 if(nameFile != ""){
518                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
519                                                 }
520                                         }
521                                 }
522                         }
523                 }
524                 
525                 ifstream inFASTA;
526                 m->openInputFile(filename, inFASTA);
527                 inFASTA.seekg(line->start);
528                 
529                 ifstream qFile;
530                 if(qFileName != "")     {
531                         m->openInputFile(qFileName, qFile);
532                         qFile.seekg(qline->start);  
533                 }
534                 
535                 int count = 0;
536                 bool moreSeqs = 1;
537         
538                 while (moreSeqs) {
539                                 
540                         if (m->control_pressed) { 
541                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
542                                 if (oligoFile != "") {   outGroupsFile.close();   }
543
544                                 if(qFileName != ""){
545                                         qFile.close();
546                                 }
547                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
548
549                                 return 0;
550                         }
551                         
552                         int success = 1;
553                         string trashCode = "";
554                         int currentSeqsDiffs = 0;
555
556                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
557                         
558                         QualityScores currQual;
559                         if(qFileName != ""){
560                                 currQual = QualityScores(qFile);  m->gobble(qFile);
561                         }
562
563                         string origSeq = currSeq.getUnaligned();
564                         if (origSeq != "") {
565                                 
566                                 int barcodeIndex = 0;
567                                 int primerIndex = 0;
568                                 
569                                 if(barcodes.size() != 0){
570                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
571                                         if(success > bdiffs)            {       trashCode += 'b';       }
572                                         else{ currentSeqsDiffs += success;  }
573                                 }
574                                 
575                                 if(numFPrimers != 0){
576                                         success = stripForward(currSeq, currQual, primerIndex);
577                                         if(success > pdiffs)            {       trashCode += 'f';       }
578                                         else{ currentSeqsDiffs += success;  }
579                                 }
580                                 
581                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
582                                 
583                                 if(numRPrimers != 0){
584                                         success = stripReverse(currSeq, currQual);
585                                         if(!success)                            {       trashCode += 'r';       }
586                                 }
587
588                                 if(keepFirst != 0){
589                                         success = keepFirstTrim(currSeq, currQual);
590                                 }
591                                 
592                                 if(removeLast != 0){
593                                         success = removeLastTrim(currSeq, currQual);
594                                         if(!success)                            {       trashCode += 'l';       }
595                                 }
596
597                                 
598                                 if(qFileName != ""){
599                                         int origLength = currSeq.getNumBases();
600                                         
601                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
602                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
603                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
604                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
605                                         else                                            {       success = 1;                            }
606                                 
607                                         //you don't want to trim, if it fails above then scrap it
608                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
609                                         
610                                         if(!success)                            {       trashCode += 'q';       }
611                                 }                               
612                 
613                                 if(minLength > 0 || maxLength > 0){
614                                         success = cullLength(currSeq);
615                                         if(!success)                            {       trashCode += 'l';       }
616                                 }
617                                 if(maxHomoP > 0){
618                                         success = cullHomoP(currSeq);
619                                         if(!success)                            {       trashCode += 'h';       }
620                                 }
621                                 if(maxAmbig != -1){
622                                         success = cullAmbigs(currSeq);
623                                         if(!success)                            {       trashCode += 'n';       }
624                                 }
625                                 
626                                 if(flip){               // should go last                       
627                                         currSeq.reverseComplement();
628                                         if(qFileName != ""){
629                                                 currQual.flipQScores(); 
630                                         }
631                                 }
632                                 
633                                 if(trashCode.length() == 0){
634                                         currSeq.setAligned(currSeq.getUnaligned());
635                                         currSeq.printSequence(trimFASTAFile);
636                                         
637                                         if(qFileName != ""){
638                                                 currQual.printQScores(trimQualFile);
639                                         }
640                                         
641                                         if(nameFile != ""){
642                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
643                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
644                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
645                                         }
646                                         
647                                         if(barcodes.size() != 0){
648                                                 string thisGroup = barcodeNameVector[barcodeIndex];
649                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
650                                                 
651                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
652                                                 
653                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
654                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
655                                                 else { groupCounts[it->first]++; }
656                                                         
657                                         }
658                                         
659                                         
660                                         if(allFiles){
661                                                 ofstream output;
662                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
663                                                 currSeq.printSequence(output);
664                                                 output.close();
665                                                 
666                                                 if(qFileName != ""){
667                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
668                                                         currQual.printQScores(output);
669                                                         output.close();                                                 
670                                                 }
671                                                 
672                                                 if(nameFile != ""){
673                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
674                                                         if (itName != nameMap.end()) { 
675                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
676                                                                 output << itName->first << '\t' << itName->second << endl; 
677                                                                 output.close();
678                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
679                                                 }
680                                         }
681                                 }
682                                 else{
683                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
684                                         currSeq.setUnaligned(origSeq);
685                                         currSeq.setAligned(origSeq);
686                                         currSeq.printSequence(scrapFASTAFile);
687                                         if(qFileName != ""){
688                                                 currQual.printQScores(scrapQualFile);
689                                         }
690                                         if(nameFile != ""){
691                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
692                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
693                                                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
694                                         }
695                                 }
696                                 count++;
697                         }
698                         
699                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
700                                 unsigned long int pos = inFASTA.tellg();
701                                 if ((pos == -1) || (pos >= line->end)) { break; }
702                         #else
703                                 if (inFASTA.eof()) { break; }
704                         #endif
705                                 
706                         //report progress
707                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
708                         
709                 }
710                 //report progress
711                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
712
713                 
714                 inFASTA.close();
715                 trimFASTAFile.close();
716                 scrapFASTAFile.close();
717                 if (oligoFile != "") {   outGroupsFile.close();   }
718                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
719                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
720                 
721                 return count;
722         }
723         catch(exception& e) {
724                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
725                 exit(1);
726         }
727 }
728
729 /**************************************************************************************************/
730
731 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
732         try {
733 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
734                 int process = 1;
735                 int exitCommand = 1;
736                 processIDS.clear();
737                 
738                 //loop through and create all the processes you want
739                 while (process != processors) {
740                         int pid = fork();
741                         
742                         if (pid > 0) {
743                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
744                                 process++;
745                         }else if (pid == 0){
746                                 
747                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
748                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
749                                 vector<vector<string> > tempNameFileNames = nameFileNames;
750
751                                 if(allFiles){
752                                         ofstream temp;
753
754                                         for(int i=0;i<tempFASTAFileNames.size();i++){
755                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
756                                                         if (tempFASTAFileNames[i][j] != "") {
757                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
758                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
759
760                                                                 if(qFileName != ""){
761                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
762                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
763                                                                 }
764                                                                 if(nameFile != ""){
765                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
766                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
767                                                                 }
768                                                         }
769                                                 }
770                                         }
771                                 }
772                                                         
773                                 driverCreateTrim(filename,
774                                                                  qFileName,
775                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
776                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
777                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
778                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
779                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
780                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
781                                                                  (groupFile + toString(getpid()) + ".temp"),
782                                                                  tempFASTAFileNames,
783                                                                  tempPrimerQualFileNames,
784                                                                  tempNameFileNames,
785                                                                  lines[process],
786                                                                  qLines[process]);
787                                 
788                                 //pass groupCounts to parent
789                                 ofstream out;
790                                 string tempFile = filename + toString(getpid()) + ".num.temp";
791                                 m->openOutputFile(tempFile, out);
792                                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
793                                         out << it->first << '\t' << it->second << endl;
794                                 }
795                                 out.close();
796                                 
797                                 exit(0);
798                         }else { 
799                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
800                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
801                                 exit(0);
802                         }
803                 }
804                 
805                 //parent do my part
806                 ofstream temp;
807                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
808                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
809                 m->openOutputFile(trimQualFileName, temp);              temp.close();
810                 m->openOutputFile(scrapQualFileName, temp);             temp.close();
811                 m->openOutputFile(trimNameFileName, temp);              temp.close();
812                 m->openOutputFile(scrapNameFileName, temp);             temp.close();
813
814                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
815                 
816                 //force parent to wait until all the processes are done
817                 for (int i=0;i<processIDS.size();i++) { 
818                         int temp = processIDS[i];
819                         wait(&temp);
820                 }
821                 
822                 //append files
823                 for(int i=0;i<processIDS.size();i++){
824                         
825                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
826                         
827                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
828                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
829                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
830                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
831                         
832                         if(qFileName != ""){
833                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
834                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
835                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
836                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
837                         }
838                         
839                         if(nameFile != ""){
840                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
841                                 remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
842                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
843                                 remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
844                         }
845                         
846                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
847                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
848                         
849                         
850                         if(allFiles){
851                                 for(int j=0;j<fastaFileNames.size();j++){
852                                         for(int k=0;k<fastaFileNames[j].size();k++){
853                                                 if (fastaFileNames[j][k] != "") {
854                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
855                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
856                                                         
857                                                         if(qFileName != ""){
858                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
859                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
860                                                         }
861                                                         
862                                                         if(nameFile != ""){
863                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
864                                                                 remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
865                                                         }
866                                                 }
867                                         }
868                                 }
869                         }
870                         
871                         ifstream in;
872                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
873                         m->openInputFile(tempFile, in);
874                         int tempNum;
875                         string group;
876                         while (!in.eof()) { 
877                                 in >> group >> tempNum; m->gobble(in);
878                                 
879                                 map<string, int>::iterator it = groupCounts.find(group);
880                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
881                                 else { groupCounts[it->first] += tempNum; }
882                         }
883                         in.close(); remove(tempFile.c_str());
884                         
885                 }
886         
887                 return exitCommand;
888 #endif          
889         }
890         catch(exception& e) {
891                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
892                 exit(1);
893         }
894 }
895
896 /**************************************************************************************************/
897
898 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
899         try {
900                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
901                 //set file positions for fasta file
902                 fastaFilePos = m->divideFile(filename, processors);
903                 
904                 if (qfilename == "") { return processors; }
905                 
906                 //get name of first sequence in each chunk
907                 map<string, int> firstSeqNames;
908                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
909                         ifstream in;
910                         m->openInputFile(filename, in);
911                         in.seekg(fastaFilePos[i]);
912                 
913                         Sequence temp(in); 
914                         firstSeqNames[temp.getName()] = i;
915                 
916                         in.close();
917                 }
918                                 
919                 //seach for filePos of each first name in the qfile and save in qfileFilePos
920                 ifstream inQual;
921                 m->openInputFile(qfilename, inQual);
922                 
923                 string input;
924                 while(!inQual.eof()){   
925                         input = m->getline(inQual);
926
927                         if (input.length() != 0) {
928                                 if(input[0] == '>'){ //this is a sequence name line
929                                         istringstream nameStream(input);
930                                         
931                                         string sname = "";  nameStream >> sname;
932                                         sname = sname.substr(1);
933                                         
934                                         map<string, int>::iterator it = firstSeqNames.find(sname);
935                                         
936                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
937                                                 unsigned long int pos = inQual.tellg(); 
938                                                 qfileFilePos.push_back(pos - input.length() - 1);       
939                                                 firstSeqNames.erase(it);
940                                         }
941                                 }
942                         }
943                         
944                         if (firstSeqNames.size() == 0) { break; }
945                 }
946                 inQual.close();
947                 
948                 
949                 if (firstSeqNames.size() != 0) { 
950                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
951                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
952                         }
953                         qFileName = "";
954                         return processors;
955                 }
956
957                 //get last file position of qfile
958                 FILE * pFile;
959                 unsigned long int size;
960                 
961                 //get num bytes in file
962                 pFile = fopen (qfilename.c_str(),"rb");
963                 if (pFile==NULL) perror ("Error opening file");
964                 else{
965                         fseek (pFile, 0, SEEK_END);
966                         size=ftell (pFile);
967                         fclose (pFile);
968                 }
969                 
970                 qfileFilePos.push_back(size);
971                 
972                 return processors;
973                 
974                 #else
975                 
976                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
977                         //get last file position of fastafile
978                         FILE * pFile;
979                         unsigned long int size;
980                         
981                         //get num bytes in file
982                         pFile = fopen (filename.c_str(),"rb");
983                         if (pFile==NULL) perror ("Error opening file");
984                         else{
985                                 fseek (pFile, 0, SEEK_END);
986                                 size=ftell (pFile);
987                                 fclose (pFile);
988                         }
989                         fastaFilePos.push_back(size);
990                         
991                         //get last file position of fastafile
992                         FILE * qFile;
993                         
994                         //get num bytes in file
995                         qFile = fopen (qfilename.c_str(),"rb");
996                         if (qFile==NULL) perror ("Error opening file");
997                         else{
998                                 fseek (qFile, 0, SEEK_END);
999                                 size=ftell (qFile);
1000                                 fclose (qFile);
1001                         }
1002                         qfileFilePos.push_back(size);
1003                 
1004                         return 1;
1005                 
1006                 #endif
1007         }
1008         catch(exception& e) {
1009                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1010                 exit(1);
1011         }
1012 }
1013
1014 //***************************************************************************************************************
1015
1016 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1017         try {
1018                 ifstream inOligos;
1019                 m->openInputFile(oligoFile, inOligos);
1020                 
1021                 ofstream test;
1022                 
1023                 string type, oligo, group;
1024
1025                 int indexPrimer = 0;
1026                 int indexBarcode = 0;
1027                 
1028                 while(!inOligos.eof()){
1029
1030                         inOligos >> type; 
1031                                         
1032                         if(type[0] == '#'){
1033                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1034                                 m->gobble(inOligos);
1035                         }
1036                         else{
1037                                 m->gobble(inOligos);
1038                                 //make type case insensitive
1039                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1040                                 
1041                                 inOligos >> oligo;
1042                                 
1043                                 for(int i=0;i<oligo.length();i++){
1044                                         oligo[i] = toupper(oligo[i]);
1045                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1046                                 }
1047                                 
1048                                 if(type == "FORWARD"){
1049                                         group = "";
1050                                         
1051                                         // get rest of line in case there is a primer name
1052                                         while (!inOligos.eof()) {       
1053                                                 char c = inOligos.get(); 
1054                                                 if (c == 10 || c == 13){        break;  }
1055                                                 else if (c == 32 || c == 9){;} //space or tab
1056                                                 else {  group += c;  }
1057                                         } 
1058                                         
1059                                         //check for repeat barcodes
1060                                         map<string, int>::iterator itPrime = primers.find(oligo);
1061                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1062                                         
1063                                         primers[oligo]=indexPrimer; indexPrimer++;              
1064                                         primerNameVector.push_back(group);
1065                                 }
1066                                 else if(type == "REVERSE"){
1067                                         Sequence oligoRC("reverse", oligo);
1068                                         oligoRC.reverseComplement();
1069                                         revPrimer.push_back(oligoRC.getUnaligned());
1070                                 }
1071                                 else if(type == "BARCODE"){
1072                                         inOligos >> group;
1073                                         
1074                                         //check for repeat barcodes
1075                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1076                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1077                                                 
1078                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1079                                         barcodeNameVector.push_back(group);
1080                                 }
1081                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1082                         }
1083                         m->gobble(inOligos);
1084                 }       
1085                 inOligos.close();
1086                 
1087                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1088                 
1089                 //add in potential combos
1090                 if(barcodeNameVector.size() == 0){
1091                         barcodes[""] = 0;
1092                         barcodeNameVector.push_back("");                        
1093                 }
1094                 
1095                 if(primerNameVector.size() == 0){
1096                         primers[""] = 0;
1097                         primerNameVector.push_back("");                 
1098                 }
1099                 
1100                 fastaFileNames.resize(barcodeNameVector.size());
1101                 for(int i=0;i<fastaFileNames.size();i++){
1102                         fastaFileNames[i].assign(primerNameVector.size(), "");
1103                 }
1104                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1105                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1106                 
1107                 if(allFiles){
1108                         set<string> uniqueNames; //used to cleanup outputFileNames
1109                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1110                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1111                                         
1112                                         string primerName = primerNameVector[itPrimer->second];
1113                                         string barcodeName = barcodeNameVector[itBar->second];
1114                                         
1115                                         string comboGroupName = "";
1116                                         string fastaFileName = "";
1117                                         string qualFileName = "";
1118                                         string nameFileName = "";
1119                                         
1120                                         if(primerName == ""){
1121                                                 comboGroupName = barcodeNameVector[itBar->second];
1122                                         }
1123                                         else{
1124                                                 if(barcodeName == ""){
1125                                                         comboGroupName = primerNameVector[itPrimer->second];
1126                                                 }
1127                                                 else{
1128                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1129                                                 }
1130                                         }
1131                                         
1132                                         
1133                                         ofstream temp;
1134                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1135                                         if (uniqueNames.count(fastaFileName) == 0) {
1136                                                 outputNames.push_back(fastaFileName);
1137                                                 outputTypes["fasta"].push_back(fastaFileName);
1138                                                 uniqueNames.insert(fastaFileName);
1139                                         }
1140                                         
1141                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1142                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1143                                         
1144                                         if(qFileName != ""){
1145                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1146                                                 if (uniqueNames.count(fastaFileName) == 0) {
1147                                                         outputNames.push_back(qualFileName);
1148                                                         outputTypes["qfile"].push_back(qualFileName);
1149                                                 }
1150                                                 
1151                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1152                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1153                                         }
1154                                         
1155                                         if(nameFile != ""){
1156                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1157                                                 if (uniqueNames.count(fastaFileName) == 0) {
1158                                                         outputNames.push_back(nameFileName);
1159                                                         outputTypes["name"].push_back(nameFileName);
1160                                                 }
1161                                                 
1162                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1163                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1164                                         }
1165                                         
1166                                 }
1167                         }
1168                 }
1169                 numFPrimers = primers.size();
1170                 numRPrimers = revPrimer.size();
1171
1172         }
1173         catch(exception& e) {
1174                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1175                 exit(1);
1176         }
1177 }
1178
1179 //***************************************************************************************************************
1180
1181 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1182         try {
1183                 
1184                 string rawSequence = seq.getUnaligned();
1185                 int success = bdiffs + 1;       //guilty until proven innocent
1186                 
1187                 //can you find the barcode
1188                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1189                         string oligo = it->first;
1190                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1191                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1192                                 break;  
1193                         }
1194                         
1195                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1196                                 group = it->second;
1197                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1198                                 
1199                                 if(qual.getName() != ""){
1200                                         qual.trimQScores(oligo.length(), -1);
1201                                 }
1202                                 
1203                                 success = 0;
1204                                 break;
1205                         }
1206                 }
1207                 
1208                 //if you found the barcode or if you don't want to allow for diffs
1209                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1210                 
1211                 else { //try aligning and see if you can find it
1212
1213                         int maxLength = 0;
1214
1215                         Alignment* alignment;
1216                         if (barcodes.size() > 0) {
1217                                 map<string,int>::iterator it=barcodes.begin();
1218
1219                                 for(it;it!=barcodes.end();it++){
1220                                         if(it->first.length() > maxLength){
1221                                                 maxLength = it->first.length();
1222                                         }
1223                                 }
1224                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1225
1226                         }else{ alignment = NULL; } 
1227                         
1228                         //can you find the barcode
1229                         int minDiff = 1e6;
1230                         int minCount = 1;
1231                         int minGroup = -1;
1232                         int minPos = 0;
1233                         
1234                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1235                                 string oligo = it->first;
1236 //                              int length = oligo.length();
1237                                 
1238                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1239                                         success = bdiffs + 10;
1240                                         break;
1241                                 }
1242                                 
1243                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1244                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1245                                 oligo = alignment->getSeqAAln();
1246                                 string temp = alignment->getSeqBAln();
1247                 
1248                                 int alnLength = oligo.length();
1249                                 
1250                                 for(int i=oligo.length()-1;i>=0;i--){
1251                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1252                                 }
1253                                 oligo = oligo.substr(0,alnLength);
1254                                 temp = temp.substr(0,alnLength);
1255                                 
1256                                 int numDiff = countDiffs(oligo, temp);
1257                                 
1258                                 if(numDiff < minDiff){
1259                                         minDiff = numDiff;
1260                                         minCount = 1;
1261                                         minGroup = it->second;
1262                                         minPos = 0;
1263                                         for(int i=0;i<alnLength;i++){
1264                                                 if(temp[i] != '-'){
1265                                                         minPos++;
1266                                                 }
1267                                         }
1268                                 }
1269                                 else if(numDiff == minDiff){
1270                                         minCount++;
1271                                 }
1272
1273                         }
1274
1275                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1276                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1277                         else{                                                                                                   //use the best match
1278                                 group = minGroup;
1279                                 seq.setUnaligned(rawSequence.substr(minPos));
1280                                 
1281                                 if(qual.getName() != ""){
1282                                         qual.trimQScores(minPos, -1);
1283                                 }
1284                                 success = minDiff;
1285                         }
1286                         
1287                         if (alignment != NULL) {  delete alignment;  }
1288                         
1289                 }
1290                 
1291                 return success;
1292                 
1293         }
1294         catch(exception& e) {
1295                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1296                 exit(1);
1297         }
1298
1299 }
1300
1301 //***************************************************************************************************************
1302
1303 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1304         try {
1305                 string rawSequence = seq.getUnaligned();
1306                 int success = pdiffs + 1;       //guilty until proven innocent
1307                 
1308                 //can you find the primer
1309                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1310                         string oligo = it->first;
1311                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1312                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1313                                 break;  
1314                         }
1315                         
1316                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1317                                 group = it->second;
1318                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1319                                 if(qual.getName() != ""){
1320                                         qual.trimQScores(oligo.length(), -1);
1321                                 }
1322                                 success = 0;
1323                                 break;
1324                         }
1325                 }
1326
1327                 //if you found the barcode or if you don't want to allow for diffs
1328                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1329                 
1330                 else { //try aligning and see if you can find it
1331
1332                         int maxLength = 0;
1333
1334                         Alignment* alignment;
1335                         if (primers.size() > 0) {
1336                                 map<string,int>::iterator it=primers.begin();
1337
1338                                 for(it;it!=primers.end();it++){
1339                                         if(it->first.length() > maxLength){
1340                                                 maxLength = it->first.length();
1341                                         }
1342                                 }
1343                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1344
1345                         }else{ alignment = NULL; } 
1346                         
1347                         //can you find the barcode
1348                         int minDiff = 1e6;
1349                         int minCount = 1;
1350                         int minGroup = -1;
1351                         int minPos = 0;
1352                         
1353                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1354                                 string oligo = it->first;
1355 //                              int length = oligo.length();
1356                                 
1357                                 if(rawSequence.length() < maxLength){   
1358                                         success = pdiffs + 100;
1359                                         break;
1360                                 }
1361                                 
1362                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1363                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1364                                 oligo = alignment->getSeqAAln();
1365                                 string temp = alignment->getSeqBAln();
1366                 
1367                                 int alnLength = oligo.length();
1368                                 
1369                                 for(int i=oligo.length()-1;i>=0;i--){
1370                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1371                                 }
1372                                 oligo = oligo.substr(0,alnLength);
1373                                 temp = temp.substr(0,alnLength);
1374                                 
1375                                 int numDiff = countDiffs(oligo, temp);
1376                                 
1377                                 if(numDiff < minDiff){
1378                                         minDiff = numDiff;
1379                                         minCount = 1;
1380                                         minGroup = it->second;
1381                                         minPos = 0;
1382                                         for(int i=0;i<alnLength;i++){
1383                                                 if(temp[i] != '-'){
1384                                                         minPos++;
1385                                                 }
1386                                         }
1387                                 }
1388                                 else if(numDiff == minDiff){
1389                                         minCount++;
1390                                 }
1391
1392                         }
1393
1394                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1395                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1396                         else{                                                                                                   //use the best match
1397                                 group = minGroup;
1398                                 seq.setUnaligned(rawSequence.substr(minPos));
1399                                 if(qual.getName() != ""){
1400                                         qual.trimQScores(minPos, -1);
1401                                 }
1402                                 success = minDiff;
1403                         }
1404                         
1405                         if (alignment != NULL) {  delete alignment;  }
1406                         
1407                 }
1408                 
1409                 return success;
1410
1411         }
1412         catch(exception& e) {
1413                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1414                 exit(1);
1415         }
1416 }
1417
1418 //***************************************************************************************************************
1419
1420 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1421         try {
1422                 string rawSequence = seq.getUnaligned();
1423                 bool success = 0;       //guilty until proven innocent
1424                 
1425                 for(int i=0;i<numRPrimers;i++){
1426                         string oligo = revPrimer[i];
1427                         
1428                         if(rawSequence.length() < oligo.length()){
1429                                 success = 0;
1430                                 break;
1431                         }
1432                         
1433                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1434                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1435                                 if(qual.getName() != ""){
1436                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1437                                 }
1438                                 success = 1;
1439                                 break;
1440                         }
1441                 }       
1442                 return success;
1443                 
1444         }
1445         catch(exception& e) {
1446                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1447                 exit(1);
1448         }
1449 }
1450
1451 //***************************************************************************************************************
1452
1453 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1454         try {
1455                 bool success = 1;
1456                 if(qscores.getName() != ""){
1457                         qscores.trimQScores(-1, keepFirst);
1458                 }
1459                 sequence.trim(keepFirst);
1460                 return success;
1461         }
1462         catch(exception& e) {
1463                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1464                 exit(1);
1465         }
1466         
1467 }       
1468
1469 //***************************************************************************************************************
1470
1471 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1472         try {
1473                 bool success = 0;
1474                 
1475                 int length = sequence.getNumBases() - removeLast;
1476                 
1477                 if(length > 0){
1478                         if(qscores.getName() != ""){
1479                                 qscores.trimQScores(-1, length);
1480                         }
1481                         sequence.trim(length);
1482                         success = 1;
1483                 }
1484                 else{
1485                         success = 0;
1486                 }
1487
1488                 return success;
1489         }
1490         catch(exception& e) {
1491                 m->errorOut(e, "removeLastTrim", "countDiffs");
1492                 exit(1);
1493         }
1494         
1495 }       
1496
1497 //***************************************************************************************************************
1498
1499 bool TrimSeqsCommand::cullLength(Sequence& seq){
1500         try {
1501         
1502                 int length = seq.getNumBases();
1503                 bool success = 0;       //guilty until proven innocent
1504                 
1505                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1506                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1507                 else                                                                                            {       success = 0;    }
1508                 
1509                 return success;
1510         
1511         }
1512         catch(exception& e) {
1513                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1514                 exit(1);
1515         }
1516         
1517 }
1518
1519 //***************************************************************************************************************
1520
1521 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1522         try {
1523                 int longHomoP = seq.getLongHomoPolymer();
1524                 bool success = 0;       //guilty until proven innocent
1525                 
1526                 if(longHomoP <= maxHomoP){      success = 1;    }
1527                 else                                    {       success = 0;    }
1528                 
1529                 return success;
1530         }
1531         catch(exception& e) {
1532                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1533                 exit(1);
1534         }
1535         
1536 }
1537
1538 //***************************************************************************************************************
1539
1540 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1541         try {
1542                 int numNs = seq.getAmbigBases();
1543                 bool success = 0;       //guilty until proven innocent
1544                 
1545                 if(numNs <= maxAmbig)   {       success = 1;    }
1546                 else                                    {       success = 0;    }
1547                 
1548                 return success;
1549         }
1550         catch(exception& e) {
1551                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1552                 exit(1);
1553         }
1554         
1555 }
1556
1557 //***************************************************************************************************************
1558
1559 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1560         try {
1561                 bool success = 1;
1562                 int length = oligo.length();
1563                 
1564                 for(int i=0;i<length;i++){
1565                         
1566                         if(oligo[i] != seq[i]){
1567                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1568                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1569                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1570                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1571                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1572                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1573                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1574                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1575                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1576                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1577                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1578                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1579                                 
1580                                 if(success == 0)        {       break;   }
1581                         }
1582                         else{
1583                                 success = 1;
1584                         }
1585                 }
1586                 
1587                 return success;
1588         }
1589         catch(exception& e) {
1590                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1591                 exit(1);
1592         }
1593
1594 }
1595
1596 //***************************************************************************************************************
1597
1598 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1599         try {
1600
1601                 int length = oligo.length();
1602                 int countDiffs = 0;
1603                 
1604                 for(int i=0;i<length;i++){
1605                                                                 
1606                         if(oligo[i] != seq[i]){
1607                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1608                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1609                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1610                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1611                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1612                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1613                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1614                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1615                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1616                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1617                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1618                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1619                         }
1620                         
1621                 }
1622                 
1623                 return countDiffs;
1624         }
1625         catch(exception& e) {
1626                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1627                 exit(1);
1628         }
1629
1630 }
1631
1632 //***************************************************************************************************************