5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::getValidParameters(){
17 string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile",
18 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19 "keepfirst", "removelast",
20 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
30 //**********************************************************************************************************************
32 TrimSeqsCommand::TrimSeqsCommand(){
35 //initialize outputTypes
36 vector<string> tempOutNames;
37 outputTypes["fasta"] = tempOutNames;
38 outputTypes["qual"] = tempOutNames;
39 outputTypes["group"] = tempOutNames;
42 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
47 //**********************************************************************************************************************
49 vector<string> TrimSeqsCommand::getRequiredParameters(){
51 string Array[] = {"fasta"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
61 //**********************************************************************************************************************
63 vector<string> TrimSeqsCommand::getRequiredFiles(){
65 vector<string> myArray;
69 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
74 //***************************************************************************************************************
76 TrimSeqsCommand::TrimSeqsCommand(string option) {
82 //allow user to run help
83 if(option == "help") { help(); abort = true; }
86 //valid paramters for this command
87 string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile",
88 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
89 "keepfirst", "removelast",
90 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
92 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
97 ValidParameters validParameter;
98 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 //initialize outputTypes
106 vector<string> tempOutNames;
107 outputTypes["fasta"] = tempOutNames;
108 outputTypes["qual"] = tempOutNames;
109 outputTypes["group"] = tempOutNames;
111 //if the user changes the input directory command factory will send this info to us in the output parameter
112 string inputDir = validParameter.validFile(parameters, "inputdir", false);
113 if (inputDir == "not found"){ inputDir = ""; }
116 it = parameters.find("fasta");
117 //user has given a template file
118 if(it != parameters.end()){
119 path = m->hasPath(it->second);
120 //if the user has not given a path then, add inputdir. else leave path alone.
121 if (path == "") { parameters["fasta"] = inputDir + it->second; }
124 it = parameters.find("oligos");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["oligos"] = inputDir + it->second; }
132 it = parameters.find("qfile");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["qfile"] = inputDir + it->second; }
140 it = parameters.find("group");
141 //user has given a template file
142 if(it != parameters.end()){
143 path = m->hasPath(it->second);
144 //if the user has not given a path then, add inputdir. else leave path alone.
145 if (path == "") { parameters["group"] = inputDir + it->second; }
150 //check for required parameters
151 fastaFile = validParameter.validFile(parameters, "fasta", true);
152 if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
153 else if (fastaFile == "not open") { abort = true; }
155 //if the user changes the output directory command factory will send this info to us in the output parameter
156 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
158 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
162 //check for optional parameter and set defaults
163 // ...at some point should added some additional type checking...
165 temp = validParameter.validFile(parameters, "flip", false);
166 if (temp == "not found"){ flip = 0; }
167 else if(m->isTrue(temp)) { flip = 1; }
169 temp = validParameter.validFile(parameters, "oligos", true);
170 if (temp == "not found"){ oligoFile = ""; }
171 else if(temp == "not open"){ abort = true; }
172 else { oligoFile = temp; }
174 temp = validParameter.validFile(parameters, "group", true);
175 if (temp == "not found"){ groupfile = ""; }
176 else if(temp == "not open"){ abort = true; }
177 else { groupfile = temp; }
179 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
180 convert(temp, maxAmbig);
182 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
183 convert(temp, maxHomoP);
185 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
186 convert(temp, minLength);
188 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
189 convert(temp, maxLength);
191 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
192 convert(temp, bdiffs);
194 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
195 convert(temp, pdiffs);
197 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
198 convert(temp, tdiffs);
200 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
202 temp = validParameter.validFile(parameters, "qfile", true);
203 if (temp == "not found") { qFileName = ""; }
204 else if(temp == "not open") { abort = true; }
205 else { qFileName = temp; }
207 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
208 convert(temp, qThreshold);
210 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
211 qtrim = m->isTrue(temp);
213 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
214 convert(temp, qRollAverage);
216 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
217 convert(temp, qWindowAverage);
219 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
220 convert(temp, qWindowSize);
222 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
223 convert(temp, qWindowStep);
225 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
226 convert(temp, qAverage);
228 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
229 convert(temp, keepFirst);
231 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
232 convert(temp, removeLast);
234 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
235 allFiles = m->isTrue(temp);
237 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
238 convert(temp, processors);
240 if ((oligoFile != "") && (groupfile != "")) {
241 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
245 if(allFiles && (oligoFile == "") && (groupfile == "")){
246 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine();
248 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
249 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
253 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
254 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
260 catch(exception& e) {
261 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
266 //**********************************************************************************************************************
268 void TrimSeqsCommand::help(){
270 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
271 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
272 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
273 m->mothurOut("The fasta parameter is required.\n");
274 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
275 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
276 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
277 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
278 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
279 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
280 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
281 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
282 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
283 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
284 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
285 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
286 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
287 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
288 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
289 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
290 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
291 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
292 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
293 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
294 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
295 m->mothurOut("The trim.seqs command should be in the following format: \n");
296 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
297 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
298 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
299 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
300 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
303 catch(exception& e) {
304 m->errorOut(e, "TrimSeqsCommand", "help");
310 //***************************************************************************************************************
312 TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ }
314 //***************************************************************************************************************
316 int TrimSeqsCommand::execute(){
319 if (abort == true) { return 0; }
321 numFPrimers = 0; //this needs to be initialized
323 vector<string> fastaFileNames;
324 vector<string> qualFileNames;
326 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
327 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
328 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
329 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
330 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
331 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
332 if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
333 string groupFile = "";
334 if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
336 groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
337 outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
338 groupMap = new GroupMap(groupfile);
342 for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
343 groupToIndex[groupMap->namesOfGroups[i]] = i;
344 groupVector.push_back(groupMap->namesOfGroups[i]);
345 fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta"));
347 //we append later, so we want to clear file
349 m->openOutputFile(fastaFileNames[i], outRemove);
352 qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual"));
354 m->openOutputFile(qualFileNames[i], outRemove2);
359 comboStarts = fastaFileNames.size()-1;
363 outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
364 getOligos(fastaFileNames, qualFileNames);
367 vector<unsigned long int> fastaFilePos;
368 vector<unsigned long int> qFilePos;
370 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
372 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
373 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
374 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
376 if(qFileName == "") { qLines = lines; } //files with duds
378 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
380 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
382 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames);
385 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
388 if (m->control_pressed) { return 0; }
391 for(int i=0;i<fastaFileNames.size();i++){
392 if (m->isBlank(fastaFileNames[i])) { blanks.insert(fastaFileNames[i]); }
393 else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
397 m->openInputFile(fastaFileNames[i], inFASTA);
399 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
401 //if the fastafile is on the blanks list then the groups file should be as well
402 if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); }
404 m->openOutputFile(outGroupFilename, outGroups);
405 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);
407 string thisGroup = "";
408 if (i > comboStarts) {
409 map<string, int>::iterator itCombo;
410 for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
411 if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; }
413 }else{ thisGroup = groupVector[i]; }
415 while(!inFASTA.eof()){
416 if(inFASTA.get() == '>'){
418 outGroups << seqName << '\t' << thisGroup << endl;
420 while (!inFASTA.eof()) { char c = inFASTA.get(); if (c == 10 || c == 13){ break; } }
427 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) { remove((*(itBlanks)).c_str()); }
431 for(int i=0;i<qualFileNames.size();i++){
432 if (m->isBlank(qualFileNames[i])) { blanks.insert(qualFileNames[i]); }
433 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
437 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) { remove((*(itBlanks)).c_str()); }
439 if (m->control_pressed) {
440 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
444 m->mothurOutEndLine();
445 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
446 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
447 m->mothurOutEndLine();
452 catch(exception& e) {
453 m->errorOut(e, "TrimSeqsCommand", "execute");
458 /**************************************************************************************/
460 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {
465 m->openOutputFile(trimFile, outFASTA);
468 m->openOutputFile(scrapFile, scrapFASTA);
473 m->openOutputFile(trimQFile, outQual);
474 m->openOutputFile(scrapQFile, scrapQual);
479 if (oligoFile != "") {
480 m->openOutputFile(groupFile, outGroups);
484 m->openInputFile(filename, inFASTA);
485 inFASTA.seekg(line->start);
488 if(qFileName != "") { m->openInputFile(qFileName, qFile); qFile.seekg(qline->start); }
491 for (int i = 0; i < fastaNames.size(); i++) { //clears old file
493 m->openOutputFile(fastaNames[i], temp);
496 for (int i = 0; i < qualNames.size(); i++) { //clears old file
498 m->openOutputFile(qualNames[i], temp);
508 if (m->control_pressed) {
509 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
510 if (oligoFile != "") { outGroups.close(); }
515 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
523 Sequence currSeq(inFASTA); m->gobble(inFASTA);
525 QualityScores currQual;
527 currQual = QualityScores(qFile, currSeq.getNumBases()); m->gobble(qFile);
530 string origSeq = currSeq.getUnaligned();
532 int groupBar, groupPrime;
533 string trashCode = "";
534 int currentSeqsDiffs = 0;
536 if(barcodes.size() != 0){
537 success = stripBarcode(currSeq, currQual, groupBar);
538 if(success > bdiffs) { trashCode += 'b'; }
539 else{ currentSeqsDiffs += success; }
542 if(numFPrimers != 0){
543 success = stripForward(currSeq, currQual, groupPrime);
544 if(success > pdiffs) { trashCode += 'f'; }
545 else{ currentSeqsDiffs += success; }
548 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
550 if(numRPrimers != 0){
551 success = stripReverse(currSeq, currQual);
552 if(!success) { trashCode += 'r'; }
556 success = keepFirstTrim(currSeq, currQual);
560 success = removeLastTrim(currSeq, currQual);
561 if(!success) { trashCode += 'l'; }
566 int origLength = currSeq.getNumBases();
568 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
569 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
570 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
571 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
572 else { success = 1; }
574 //you don't want to trim, if it fails above then scrap it
575 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
577 if(!success) { trashCode += 'q'; }
580 if(minLength > 0 || maxLength > 0){
581 success = cullLength(currSeq);
582 if(!success) { trashCode += 'l'; }
585 success = cullHomoP(currSeq);
586 if(!success) { trashCode += 'h'; }
589 success = cullAmbigs(currSeq);
590 if(!success) { trashCode += 'n'; }
593 if(flip){ // should go last
594 currSeq.reverseComplement();
595 currQual.flipQScores();
598 if(trashCode.length() == 0){
599 currSeq.setAligned(currSeq.getUnaligned());
600 currSeq.printSequence(outFASTA);
601 currQual.printQScores(outQual);
603 if(barcodes.size() != 0){
604 string thisGroup = groupVector[groupBar];
605 int indexToFastaFile = groupBar;
606 if (primers.size() != 0){
607 //does this primer have a group
608 if (groupVector[groupPrime] != "") {
609 thisGroup += "." + groupVector[groupPrime];
610 indexToFastaFile = combos[thisGroup];
613 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
616 m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
617 //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
618 currSeq.printSequence(outTemp);
622 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
624 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
625 currQual.printQScores(outTemp2);
631 if (groupfile != "") {
632 string thisGroup = groupMap->getGroup(currSeq.getName());
634 if (thisGroup != "not found") {
635 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
638 m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
639 currSeq.printSequence(outTemp);
643 m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
644 currQual.printQScores(outTemp2);
649 m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
650 outGroups << currSeq.getName() << '\t' << "XXX" << endl;
652 m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
658 currSeq.setName(currSeq.getName() + '|' + trashCode);
659 currSeq.setUnaligned(origSeq);
660 currSeq.setAligned(origSeq);
661 currSeq.printSequence(scrapFASTA);
662 currQual.printQScores(scrapQual);
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
668 unsigned long int pos = inFASTA.tellg();
669 if ((pos == -1) || (pos >= line->end)) { break; }
671 if (inFASTA.eof()) { break; }
675 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
679 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
685 if (oligoFile != "") { outGroups.close(); }
686 if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); }
690 catch(exception& e) {
691 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
696 /**************************************************************************************************/
698 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
700 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
705 //loop through and create all the processes you want
706 while (process != processors) {
710 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
713 for (int i = 0; i < fastaNames.size(); i++) {
714 fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
715 //clear old file if it exists
717 m->openOutputFile(fastaNames[i], temp);
720 qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
721 //clear old file if it exists
723 m->openOutputFile(qualNames[i], temp2);
728 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
731 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
732 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
738 for (int i = 0; i < fastaNames.size(); i++) {
739 //clear old file if it exists
741 m->openOutputFile(fastaNames[i], temp);
744 //clear old file if it exists
746 m->openOutputFile(qualNames[i], temp2);
751 driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]);
754 //force parent to wait until all the processes are done
755 for (int i=0;i<processIDS.size();i++) {
756 int temp = processIDS[i];
761 for(int i=0;i<processIDS.size();i++){
763 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
765 m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile);
766 remove((trimFile + toString(processIDS[i]) + ".temp").c_str());
767 m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile);
768 remove((scrapFile + toString(processIDS[i]) + ".temp").c_str());
770 m->mothurOut("Done with fasta files"); m->mothurOutEndLine();
773 m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile);
774 remove((trimQFile + toString(processIDS[i]) + ".temp").c_str());
775 m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile);
776 remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str());
778 m->mothurOut("Done with quality files"); m->mothurOutEndLine();
781 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
782 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
784 m->mothurOut("Done with group file"); m->mothurOutEndLine();
786 for (int j = 0; j < fastaNames.size(); j++) {
787 m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]);
788 remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str());
792 for (int j = 0; j < qualNames.size(); j++) {
793 m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]);
794 remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str());
798 if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); }
804 catch(exception& e) {
805 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
810 /**************************************************************************************************/
812 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
815 //set file positions for fasta file
816 fastaFilePos = m->divideFile(filename, processors);
818 if (qfilename == "") { return processors; }
820 //get name of first sequence in each chunk
821 map<string, int> firstSeqNames;
822 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
824 m->openInputFile(filename, in);
825 in.seekg(fastaFilePos[i]);
828 firstSeqNames[temp.getName()] = i;
833 //seach for filePos of each first name in the qfile and save in qfileFilePos
835 m->openInputFile(qfilename, inQual);
838 while(!inQual.eof()){
839 input = m->getline(inQual);
841 if (input.length() != 0) {
842 if(input[0] == '>'){ //this is a sequence name line
843 istringstream nameStream(input);
845 string sname = ""; nameStream >> sname;
846 sname = sname.substr(1);
848 map<string, int>::iterator it = firstSeqNames.find(sname);
850 if(it != firstSeqNames.end()) { //this is the start of a new chunk
851 unsigned long int pos = inQual.tellg();
852 qfileFilePos.push_back(pos - input.length() - 1);
853 firstSeqNames.erase(it);
858 if (firstSeqNames.size() == 0) { break; }
863 if (firstSeqNames.size() != 0) {
864 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
865 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
871 //get last file position of qfile
873 unsigned long int size;
875 //get num bytes in file
876 pFile = fopen (qfilename.c_str(),"rb");
877 if (pFile==NULL) perror ("Error opening file");
879 fseek (pFile, 0, SEEK_END);
884 qfileFilePos.push_back(size);
888 catch(exception& e) {
889 m->errorOut(e, "TrimSeqsCommand", "setLines");
894 //***************************************************************************************************************
896 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
899 m->openInputFile(oligoFile, inOligos);
903 string type, oligo, group;
905 //int indexPrimer = 0;
907 while(!inOligos.eof()){
908 inOligos >> type; m->gobble(inOligos);
911 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
914 //make type case insensitive
915 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
919 for(int i=0;i<oligo.length();i++){
920 oligo[i] = toupper(oligo[i]);
921 if(oligo[i] == 'U') { oligo[i] = 'T'; }
924 if(type == "FORWARD"){
927 // get rest of line in case there is a primer name
928 while (!inOligos.eof()) {
929 char c = inOligos.get();
930 if (c == 10 || c == 13){ break; }
931 else if (c == 32 || c == 9){;} //space or tab
935 //check for repeat barcodes
936 map<string, int>::iterator itPrime = primers.find(oligo);
937 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
939 primers[oligo]=index; index++;
940 groupVector.push_back(group);
943 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
945 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
947 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
948 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
950 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
953 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
954 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
956 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
957 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
963 else if(type == "REVERSE"){
964 Sequence oligoRC("reverse", oligo);
965 oligoRC.reverseComplement();
966 revPrimer.push_back(oligoRC.getUnaligned());
968 else if(type == "BARCODE"){
971 //check for repeat barcodes
972 map<string, int>::iterator itBar = barcodes.find(oligo);
973 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
975 barcodes[oligo]=index; index++;
976 groupVector.push_back(group);
979 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
980 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
981 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
983 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
984 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
985 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
989 }else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
996 //add in potential combos
998 comboStarts = outFASTAVec.size()-1;
999 for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
1000 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
1001 if (groupVector[itPrime->second] != "") { //there is a group for this primer
1002 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1003 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1004 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1005 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
1007 if(qFileName != ""){
1008 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1009 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1010 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1017 numFPrimers = primers.size();
1018 numRPrimers = revPrimer.size();
1021 catch(exception& e) {
1022 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1026 //***************************************************************************************************************
1028 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1031 string rawSequence = seq.getUnaligned();
1032 int success = bdiffs + 1; //guilty until proven innocent
1034 //can you find the barcode
1035 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1036 string oligo = it->first;
1037 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1038 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1042 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1044 seq.setUnaligned(rawSequence.substr(oligo.length()));
1046 if(qual.getName() != ""){
1047 qual.trimQScores(oligo.length(), -1);
1055 //if you found the barcode or if you don't want to allow for diffs
1057 if ((bdiffs == 0) || (success == 0)) { return success; }
1059 else { //try aligning and see if you can find it
1064 Alignment* alignment;
1065 if (barcodes.size() > 0) {
1066 map<string,int>::iterator it=barcodes.begin();
1068 for(it;it!=barcodes.end();it++){
1069 if(it->first.length() > maxLength){
1070 maxLength = it->first.length();
1073 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1075 }else{ alignment = NULL; }
1077 //can you find the barcode
1083 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1084 string oligo = it->first;
1085 // int length = oligo.length();
1087 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1088 success = bdiffs + 10;
1092 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1093 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1094 oligo = alignment->getSeqAAln();
1095 string temp = alignment->getSeqBAln();
1097 int alnLength = oligo.length();
1099 for(int i=oligo.length()-1;i>=0;i--){
1100 if(oligo[i] != '-'){ alnLength = i+1; break; }
1102 oligo = oligo.substr(0,alnLength);
1103 temp = temp.substr(0,alnLength);
1105 int numDiff = countDiffs(oligo, temp);
1107 // cout << oligo << '\t' << temp << '\t' << numDiff << endl;
1109 if(numDiff < minDiff){
1112 minGroup = it->second;
1114 for(int i=0;i<alnLength;i++){
1120 else if(numDiff == minDiff){
1126 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1127 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1128 else{ //use the best match
1130 seq.setUnaligned(rawSequence.substr(minPos));
1132 if(qual.getName() != ""){
1133 qual.trimQScores(minPos, -1);
1138 if (alignment != NULL) { delete alignment; }
1141 // cout << success << endl;
1146 catch(exception& e) {
1147 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1153 //***************************************************************************************************************
1155 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1157 string rawSequence = seq.getUnaligned();
1158 int success = pdiffs + 1; //guilty until proven innocent
1160 //can you find the primer
1161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1162 string oligo = it->first;
1163 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1164 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1168 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1170 seq.setUnaligned(rawSequence.substr(oligo.length()));
1171 if(qual.getName() != ""){
1172 qual.trimQScores(oligo.length(), -1);
1179 //if you found the barcode or if you don't want to allow for diffs
1181 if ((pdiffs == 0) || (success == 0)) { return success; }
1183 else { //try aligning and see if you can find it
1188 Alignment* alignment;
1189 if (primers.size() > 0) {
1190 map<string,int>::iterator it=primers.begin();
1192 for(it;it!=primers.end();it++){
1193 if(it->first.length() > maxLength){
1194 maxLength = it->first.length();
1197 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1199 }else{ alignment = NULL; }
1201 //can you find the barcode
1207 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1208 string oligo = it->first;
1209 // int length = oligo.length();
1211 if(rawSequence.length() < maxLength){
1212 success = pdiffs + 100;
1216 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1217 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1218 oligo = alignment->getSeqAAln();
1219 string temp = alignment->getSeqBAln();
1221 int alnLength = oligo.length();
1223 for(int i=oligo.length()-1;i>=0;i--){
1224 if(oligo[i] != '-'){ alnLength = i+1; break; }
1226 oligo = oligo.substr(0,alnLength);
1227 temp = temp.substr(0,alnLength);
1229 int numDiff = countDiffs(oligo, temp);
1231 // cout << oligo << '\t' << temp << '\t' << numDiff << endl;
1233 if(numDiff < minDiff){
1236 minGroup = it->second;
1238 for(int i=0;i<alnLength;i++){
1244 else if(numDiff == minDiff){
1250 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1251 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1252 else{ //use the best match
1254 seq.setUnaligned(rawSequence.substr(minPos));
1255 if(qual.getName() != ""){
1256 qual.trimQScores(minPos, -1);
1261 if (alignment != NULL) { delete alignment; }
1268 catch(exception& e) {
1269 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1274 //***************************************************************************************************************
1276 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1278 string rawSequence = seq.getUnaligned();
1279 bool success = 0; //guilty until proven innocent
1281 for(int i=0;i<numRPrimers;i++){
1282 string oligo = revPrimer[i];
1284 if(rawSequence.length() < oligo.length()){
1289 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1290 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1291 if(qual.getName() != ""){
1292 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1301 catch(exception& e) {
1302 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1307 //***************************************************************************************************************
1309 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1312 if(qscores.getName() != ""){
1313 qscores.trimQScores(-1, keepFirst);
1315 sequence.trim(keepFirst);
1318 catch(exception& e) {
1319 m->errorOut(e, "keepFirstTrim", "countDiffs");
1325 //***************************************************************************************************************
1327 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1331 int length = sequence.getNumBases() - removeLast;
1334 if(qscores.getName() != ""){
1335 qscores.trimQScores(-1, length);
1337 sequence.trim(length);
1346 catch(exception& e) {
1347 m->errorOut(e, "removeLastTrim", "countDiffs");
1353 //***************************************************************************************************************
1355 bool TrimSeqsCommand::cullLength(Sequence& seq){
1358 int length = seq.getNumBases();
1359 bool success = 0; //guilty until proven innocent
1361 if(length >= minLength && maxLength == 0) { success = 1; }
1362 else if(length >= minLength && length <= maxLength) { success = 1; }
1363 else { success = 0; }
1368 catch(exception& e) {
1369 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1375 //***************************************************************************************************************
1377 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1379 int longHomoP = seq.getLongHomoPolymer();
1380 bool success = 0; //guilty until proven innocent
1382 if(longHomoP <= maxHomoP){ success = 1; }
1383 else { success = 0; }
1387 catch(exception& e) {
1388 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1394 //***************************************************************************************************************
1396 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1398 int numNs = seq.getAmbigBases();
1399 bool success = 0; //guilty until proven innocent
1401 if(numNs <= maxAmbig) { success = 1; }
1402 else { success = 0; }
1406 catch(exception& e) {
1407 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1413 //***************************************************************************************************************
1415 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1418 int length = oligo.length();
1420 for(int i=0;i<length;i++){
1422 if(oligo[i] != seq[i]){
1423 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1424 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1425 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1426 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1427 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1428 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1429 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1430 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1431 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1432 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1433 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1434 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1436 if(success == 0) { break; }
1445 catch(exception& e) {
1446 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1452 //***************************************************************************************************************
1454 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1457 int length = oligo.length();
1460 for(int i=0;i<length;i++){
1462 if(oligo[i] != seq[i]){
1463 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1464 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1465 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1466 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1467 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1468 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1469 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1470 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1471 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1472 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1473 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1474 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1481 catch(exception& e) {
1482 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1488 //***************************************************************************************************************