5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::getValidParameters(){
17 string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile",
18 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19 "keepfirst", "removelast",
20 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
30 //**********************************************************************************************************************
32 TrimSeqsCommand::TrimSeqsCommand(){
34 abort = true; calledHelp = true;
35 vector<string> tempOutNames;
36 outputTypes["fasta"] = tempOutNames;
37 outputTypes["qual"] = tempOutNames;
38 outputTypes["group"] = tempOutNames;
41 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
46 //**********************************************************************************************************************
48 vector<string> TrimSeqsCommand::getRequiredParameters(){
50 string Array[] = {"fasta"};
51 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
60 //**********************************************************************************************************************
62 vector<string> TrimSeqsCommand::getRequiredFiles(){
64 vector<string> myArray;
68 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
73 //***************************************************************************************************************
75 TrimSeqsCommand::TrimSeqsCommand(string option) {
78 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
85 //valid paramters for this command
86 string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile",
87 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
88 "keepfirst", "removelast",
89 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
91 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
93 OptionParser parser(option);
94 map<string,string> parameters = parser.getParameters();
96 ValidParameters validParameter;
97 map<string,string>::iterator it;
99 //check to make sure all parameters are valid for command
100 for (it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 //initialize outputTypes
105 vector<string> tempOutNames;
106 outputTypes["fasta"] = tempOutNames;
107 outputTypes["qual"] = tempOutNames;
108 outputTypes["group"] = tempOutNames;
110 //if the user changes the input directory command factory will send this info to us in the output parameter
111 string inputDir = validParameter.validFile(parameters, "inputdir", false);
112 if (inputDir == "not found"){ inputDir = ""; }
115 it = parameters.find("fasta");
116 //user has given a template file
117 if(it != parameters.end()){
118 path = m->hasPath(it->second);
119 //if the user has not given a path then, add inputdir. else leave path alone.
120 if (path == "") { parameters["fasta"] = inputDir + it->second; }
123 it = parameters.find("oligos");
124 //user has given a template file
125 if(it != parameters.end()){
126 path = m->hasPath(it->second);
127 //if the user has not given a path then, add inputdir. else leave path alone.
128 if (path == "") { parameters["oligos"] = inputDir + it->second; }
131 it = parameters.find("qfile");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["qfile"] = inputDir + it->second; }
139 it = parameters.find("group");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["group"] = inputDir + it->second; }
149 //check for required parameters
150 fastaFile = validParameter.validFile(parameters, "fasta", true);
151 if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
152 else if (fastaFile == "not open") { abort = true; }
154 //if the user changes the output directory command factory will send this info to us in the output parameter
155 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
157 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
161 //check for optional parameter and set defaults
162 // ...at some point should added some additional type checking...
164 temp = validParameter.validFile(parameters, "flip", false);
165 if (temp == "not found"){ flip = 0; }
166 else if(m->isTrue(temp)) { flip = 1; }
168 temp = validParameter.validFile(parameters, "oligos", true);
169 if (temp == "not found"){ oligoFile = ""; }
170 else if(temp == "not open"){ abort = true; }
171 else { oligoFile = temp; }
173 temp = validParameter.validFile(parameters, "group", true);
174 if (temp == "not found"){ groupfile = ""; }
175 else if(temp == "not open"){ abort = true; }
176 else { groupfile = temp; }
178 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
179 convert(temp, maxAmbig);
181 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
182 convert(temp, maxHomoP);
184 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
185 convert(temp, minLength);
187 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
188 convert(temp, maxLength);
190 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
191 convert(temp, bdiffs);
193 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
194 convert(temp, pdiffs);
196 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
197 convert(temp, tdiffs);
199 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
201 temp = validParameter.validFile(parameters, "qfile", true);
202 if (temp == "not found") { qFileName = ""; }
203 else if(temp == "not open") { abort = true; }
204 else { qFileName = temp; }
206 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
207 convert(temp, qThreshold);
209 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
210 qtrim = m->isTrue(temp);
212 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
213 convert(temp, qRollAverage);
215 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
216 convert(temp, qWindowAverage);
218 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
219 convert(temp, qWindowSize);
221 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
222 convert(temp, qWindowStep);
224 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
225 convert(temp, qAverage);
227 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
228 convert(temp, keepFirst);
230 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
231 convert(temp, removeLast);
233 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
234 allFiles = m->isTrue(temp);
236 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
237 convert(temp, processors);
239 if ((oligoFile != "") && (groupfile != "")) {
240 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
244 if(allFiles && (oligoFile == "") && (groupfile == "")){
245 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine();
247 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
248 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
252 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
253 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
259 catch(exception& e) {
260 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
265 //**********************************************************************************************************************
267 void TrimSeqsCommand::help(){
269 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
270 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
271 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
272 m->mothurOut("The fasta parameter is required.\n");
273 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
274 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
275 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
276 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
277 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
278 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
279 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
280 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
281 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
282 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
283 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
284 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
285 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
286 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
287 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
288 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
289 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
290 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
291 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
292 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
293 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
294 m->mothurOut("The trim.seqs command should be in the following format: \n");
295 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
296 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
297 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
298 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
299 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
302 catch(exception& e) {
303 m->errorOut(e, "TrimSeqsCommand", "help");
309 //***************************************************************************************************************
311 TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ }
313 //***************************************************************************************************************
315 int TrimSeqsCommand::execute(){
318 if (abort == true) { if (calledHelp) { return 0; } return 2; }
320 numFPrimers = 0; //this needs to be initialized
322 vector<string> fastaFileNames;
323 vector<string> qualFileNames;
325 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
326 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
327 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
328 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
329 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
330 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
331 if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
332 string groupFile = "";
333 if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
335 groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
336 outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
337 groupMap = new GroupMap(groupfile);
341 for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
342 groupToIndex[groupMap->namesOfGroups[i]] = i;
343 groupVector.push_back(groupMap->namesOfGroups[i]);
344 fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta"));
346 //we append later, so we want to clear file
348 m->openOutputFile(fastaFileNames[i], outRemove);
351 qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual"));
353 m->openOutputFile(qualFileNames[i], outRemove2);
358 comboStarts = fastaFileNames.size()-1;
362 outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
363 getOligos(fastaFileNames, qualFileNames);
366 vector<unsigned long int> fastaFilePos;
367 vector<unsigned long int> qFilePos;
369 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
371 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
372 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
373 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
375 if(qFileName == "") { qLines = lines; } //files with duds
377 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
379 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
381 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames);
384 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
387 if (m->control_pressed) { return 0; }
390 for(int i=0;i<fastaFileNames.size();i++){
391 if (m->isBlank(fastaFileNames[i])) { blanks.insert(fastaFileNames[i]); }
392 else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
396 m->openInputFile(fastaFileNames[i], inFASTA);
398 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
400 //if the fastafile is on the blanks list then the groups file should be as well
401 if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); }
403 m->openOutputFile(outGroupFilename, outGroups);
404 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);
406 string thisGroup = "";
407 if (i > comboStarts) {
408 map<string, int>::iterator itCombo;
409 for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
410 if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; }
412 }else{ thisGroup = groupVector[i]; }
414 while(!inFASTA.eof()){
415 if(inFASTA.get() == '>'){
417 outGroups << seqName << '\t' << thisGroup << endl;
419 while (!inFASTA.eof()) { char c = inFASTA.get(); if (c == 10 || c == 13){ break; } }
426 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) { remove((*(itBlanks)).c_str()); }
430 for(int i=0;i<qualFileNames.size();i++){
431 if (m->isBlank(qualFileNames[i])) { blanks.insert(qualFileNames[i]); }
432 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
436 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) { remove((*(itBlanks)).c_str()); }
438 if (m->control_pressed) {
439 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
443 m->mothurOutEndLine();
444 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
445 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
446 m->mothurOutEndLine();
451 catch(exception& e) {
452 m->errorOut(e, "TrimSeqsCommand", "execute");
457 /**************************************************************************************/
459 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {
464 m->openOutputFile(trimFile, outFASTA);
467 m->openOutputFile(scrapFile, scrapFASTA);
472 m->openOutputFile(trimQFile, outQual);
473 m->openOutputFile(scrapQFile, scrapQual);
478 if (oligoFile != "") {
479 m->openOutputFile(groupFile, outGroups);
483 m->openInputFile(filename, inFASTA);
484 inFASTA.seekg(line->start);
487 if(qFileName != "") { m->openInputFile(qFileName, qFile); qFile.seekg(qline->start); }
490 for (int i = 0; i < fastaNames.size(); i++) { //clears old file
492 m->openOutputFile(fastaNames[i], temp);
495 for (int i = 0; i < qualNames.size(); i++) { //clears old file
497 m->openOutputFile(qualNames[i], temp);
507 if (m->control_pressed) {
508 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
509 if (oligoFile != "") { outGroups.close(); }
514 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
522 Sequence currSeq(inFASTA); m->gobble(inFASTA);
524 QualityScores currQual;
526 currQual = QualityScores(qFile, currSeq.getNumBases()); m->gobble(qFile);
529 string origSeq = currSeq.getUnaligned();
531 int groupBar, groupPrime;
532 string trashCode = "";
533 int currentSeqsDiffs = 0;
535 if(barcodes.size() != 0){
536 success = stripBarcode(currSeq, currQual, groupBar);
537 if(success > bdiffs) { trashCode += 'b'; }
538 else{ currentSeqsDiffs += success; }
541 if(numFPrimers != 0){
542 success = stripForward(currSeq, currQual, groupPrime);
543 if(success > pdiffs) { trashCode += 'f'; }
544 else{ currentSeqsDiffs += success; }
547 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
549 if(numRPrimers != 0){
550 success = stripReverse(currSeq, currQual);
551 if(!success) { trashCode += 'r'; }
555 success = keepFirstTrim(currSeq, currQual);
559 success = removeLastTrim(currSeq, currQual);
560 if(!success) { trashCode += 'l'; }
565 int origLength = currSeq.getNumBases();
567 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
568 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
569 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
570 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
571 else { success = 1; }
573 //you don't want to trim, if it fails above then scrap it
574 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
576 if(!success) { trashCode += 'q'; }
579 if(minLength > 0 || maxLength > 0){
580 success = cullLength(currSeq);
581 if(!success) { trashCode += 'l'; }
584 success = cullHomoP(currSeq);
585 if(!success) { trashCode += 'h'; }
588 success = cullAmbigs(currSeq);
589 if(!success) { trashCode += 'n'; }
592 if(flip){ // should go last
593 currSeq.reverseComplement();
594 currQual.flipQScores();
597 if(trashCode.length() == 0){
598 currSeq.setAligned(currSeq.getUnaligned());
599 currSeq.printSequence(outFASTA);
600 currQual.printQScores(outQual);
602 if(barcodes.size() != 0){
603 string thisGroup = groupVector[groupBar];
604 int indexToFastaFile = groupBar;
605 if (primers.size() != 0){
606 //does this primer have a group
607 if (groupVector[groupPrime] != "") {
608 thisGroup += "." + groupVector[groupPrime];
609 indexToFastaFile = combos[thisGroup];
612 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
615 m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
616 //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
617 currSeq.printSequence(outTemp);
621 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
623 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
624 currQual.printQScores(outTemp2);
630 if (groupfile != "") {
631 string thisGroup = groupMap->getGroup(currSeq.getName());
633 if (thisGroup != "not found") {
634 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
637 m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
638 currSeq.printSequence(outTemp);
642 m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
643 currQual.printQScores(outTemp2);
648 m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
649 outGroups << currSeq.getName() << '\t' << "XXX" << endl;
651 m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
657 currSeq.setName(currSeq.getName() + '|' + trashCode);
658 currSeq.setUnaligned(origSeq);
659 currSeq.setAligned(origSeq);
660 currSeq.printSequence(scrapFASTA);
661 currQual.printQScores(scrapQual);
666 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
667 unsigned long int pos = inFASTA.tellg();
668 if ((pos == -1) || (pos >= line->end)) { break; }
670 if (inFASTA.eof()) { break; }
674 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
678 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
684 if (oligoFile != "") { outGroups.close(); }
685 if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); }
689 catch(exception& e) {
690 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
695 /**************************************************************************************************/
697 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
699 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
704 //loop through and create all the processes you want
705 while (process != processors) {
709 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
712 for (int i = 0; i < fastaNames.size(); i++) {
713 fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
714 //clear old file if it exists
716 m->openOutputFile(fastaNames[i], temp);
719 qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
720 //clear old file if it exists
722 m->openOutputFile(qualNames[i], temp2);
727 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
730 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
731 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
737 for (int i = 0; i < fastaNames.size(); i++) {
738 //clear old file if it exists
740 m->openOutputFile(fastaNames[i], temp);
743 //clear old file if it exists
745 m->openOutputFile(qualNames[i], temp2);
750 driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]);
753 //force parent to wait until all the processes are done
754 for (int i=0;i<processIDS.size();i++) {
755 int temp = processIDS[i];
760 for(int i=0;i<processIDS.size();i++){
762 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
764 m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile);
765 remove((trimFile + toString(processIDS[i]) + ".temp").c_str());
766 m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile);
767 remove((scrapFile + toString(processIDS[i]) + ".temp").c_str());
769 m->mothurOut("Done with fasta files"); m->mothurOutEndLine();
772 m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile);
773 remove((trimQFile + toString(processIDS[i]) + ".temp").c_str());
774 m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile);
775 remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str());
777 m->mothurOut("Done with quality files"); m->mothurOutEndLine();
780 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
781 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
783 m->mothurOut("Done with group file"); m->mothurOutEndLine();
785 for (int j = 0; j < fastaNames.size(); j++) {
786 m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]);
787 remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str());
791 for (int j = 0; j < qualNames.size(); j++) {
792 m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]);
793 remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str());
797 if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); }
803 catch(exception& e) {
804 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
809 /**************************************************************************************************/
811 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
814 //set file positions for fasta file
815 fastaFilePos = m->divideFile(filename, processors);
817 if (qfilename == "") { return processors; }
819 //get name of first sequence in each chunk
820 map<string, int> firstSeqNames;
821 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
823 m->openInputFile(filename, in);
824 in.seekg(fastaFilePos[i]);
827 firstSeqNames[temp.getName()] = i;
832 //seach for filePos of each first name in the qfile and save in qfileFilePos
834 m->openInputFile(qfilename, inQual);
837 while(!inQual.eof()){
838 input = m->getline(inQual);
840 if (input.length() != 0) {
841 if(input[0] == '>'){ //this is a sequence name line
842 istringstream nameStream(input);
844 string sname = ""; nameStream >> sname;
845 sname = sname.substr(1);
847 map<string, int>::iterator it = firstSeqNames.find(sname);
849 if(it != firstSeqNames.end()) { //this is the start of a new chunk
850 unsigned long int pos = inQual.tellg();
851 qfileFilePos.push_back(pos - input.length() - 1);
852 firstSeqNames.erase(it);
857 if (firstSeqNames.size() == 0) { break; }
862 if (firstSeqNames.size() != 0) {
863 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
864 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
870 //get last file position of qfile
872 unsigned long int size;
874 //get num bytes in file
875 pFile = fopen (qfilename.c_str(),"rb");
876 if (pFile==NULL) perror ("Error opening file");
878 fseek (pFile, 0, SEEK_END);
883 qfileFilePos.push_back(size);
887 catch(exception& e) {
888 m->errorOut(e, "TrimSeqsCommand", "setLines");
893 //***************************************************************************************************************
895 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
898 m->openInputFile(oligoFile, inOligos);
902 string type, oligo, group;
904 //int indexPrimer = 0;
906 while(!inOligos.eof()){
907 inOligos >> type; m->gobble(inOligos);
910 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
913 //make type case insensitive
914 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
918 for(int i=0;i<oligo.length();i++){
919 oligo[i] = toupper(oligo[i]);
920 if(oligo[i] == 'U') { oligo[i] = 'T'; }
923 if(type == "FORWARD"){
926 // get rest of line in case there is a primer name
927 while (!inOligos.eof()) {
928 char c = inOligos.get();
929 if (c == 10 || c == 13){ break; }
930 else if (c == 32 || c == 9){;} //space or tab
934 //check for repeat barcodes
935 map<string, int>::iterator itPrime = primers.find(oligo);
936 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
938 primers[oligo]=index; index++;
939 groupVector.push_back(group);
942 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
944 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
946 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
947 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
949 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
952 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
953 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
955 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
956 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
962 else if(type == "REVERSE"){
963 Sequence oligoRC("reverse", oligo);
964 oligoRC.reverseComplement();
965 revPrimer.push_back(oligoRC.getUnaligned());
967 else if(type == "BARCODE"){
970 //check for repeat barcodes
971 map<string, int>::iterator itBar = barcodes.find(oligo);
972 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
974 barcodes[oligo]=index; index++;
975 groupVector.push_back(group);
978 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
979 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
980 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
982 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
983 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
984 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
988 }else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
995 //add in potential combos
997 comboStarts = outFASTAVec.size()-1;
998 for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
999 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
1000 if (groupVector[itPrime->second] != "") { //there is a group for this primer
1001 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1002 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1003 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1004 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
1006 if(qFileName != ""){
1007 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1008 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1009 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1016 numFPrimers = primers.size();
1017 numRPrimers = revPrimer.size();
1020 catch(exception& e) {
1021 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1025 //***************************************************************************************************************
1027 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1030 string rawSequence = seq.getUnaligned();
1031 int success = bdiffs + 1; //guilty until proven innocent
1033 //can you find the barcode
1034 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1035 string oligo = it->first;
1036 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1037 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1041 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1043 seq.setUnaligned(rawSequence.substr(oligo.length()));
1045 if(qual.getName() != ""){
1046 qual.trimQScores(oligo.length(), -1);
1054 //if you found the barcode or if you don't want to allow for diffs
1056 if ((bdiffs == 0) || (success == 0)) { return success; }
1058 else { //try aligning and see if you can find it
1063 Alignment* alignment;
1064 if (barcodes.size() > 0) {
1065 map<string,int>::iterator it=barcodes.begin();
1067 for(it;it!=barcodes.end();it++){
1068 if(it->first.length() > maxLength){
1069 maxLength = it->first.length();
1072 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1074 }else{ alignment = NULL; }
1076 //can you find the barcode
1082 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1083 string oligo = it->first;
1084 // int length = oligo.length();
1086 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1087 success = bdiffs + 10;
1091 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1092 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1093 oligo = alignment->getSeqAAln();
1094 string temp = alignment->getSeqBAln();
1096 int alnLength = oligo.length();
1098 for(int i=oligo.length()-1;i>=0;i--){
1099 if(oligo[i] != '-'){ alnLength = i+1; break; }
1101 oligo = oligo.substr(0,alnLength);
1102 temp = temp.substr(0,alnLength);
1104 int numDiff = countDiffs(oligo, temp);
1106 // cout << oligo << '\t' << temp << '\t' << numDiff << endl;
1108 if(numDiff < minDiff){
1111 minGroup = it->second;
1113 for(int i=0;i<alnLength;i++){
1119 else if(numDiff == minDiff){
1125 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1126 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1127 else{ //use the best match
1129 seq.setUnaligned(rawSequence.substr(minPos));
1131 if(qual.getName() != ""){
1132 qual.trimQScores(minPos, -1);
1137 if (alignment != NULL) { delete alignment; }
1140 // cout << success << endl;
1145 catch(exception& e) {
1146 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1152 //***************************************************************************************************************
1154 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1156 string rawSequence = seq.getUnaligned();
1157 int success = pdiffs + 1; //guilty until proven innocent
1159 //can you find the primer
1160 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1161 string oligo = it->first;
1162 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1163 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1167 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1169 seq.setUnaligned(rawSequence.substr(oligo.length()));
1170 if(qual.getName() != ""){
1171 qual.trimQScores(oligo.length(), -1);
1178 //if you found the barcode or if you don't want to allow for diffs
1180 if ((pdiffs == 0) || (success == 0)) { return success; }
1182 else { //try aligning and see if you can find it
1187 Alignment* alignment;
1188 if (primers.size() > 0) {
1189 map<string,int>::iterator it=primers.begin();
1191 for(it;it!=primers.end();it++){
1192 if(it->first.length() > maxLength){
1193 maxLength = it->first.length();
1196 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1198 }else{ alignment = NULL; }
1200 //can you find the barcode
1206 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1207 string oligo = it->first;
1208 // int length = oligo.length();
1210 if(rawSequence.length() < maxLength){
1211 success = pdiffs + 100;
1215 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1216 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1217 oligo = alignment->getSeqAAln();
1218 string temp = alignment->getSeqBAln();
1220 int alnLength = oligo.length();
1222 for(int i=oligo.length()-1;i>=0;i--){
1223 if(oligo[i] != '-'){ alnLength = i+1; break; }
1225 oligo = oligo.substr(0,alnLength);
1226 temp = temp.substr(0,alnLength);
1228 int numDiff = countDiffs(oligo, temp);
1230 // cout << oligo << '\t' << temp << '\t' << numDiff << endl;
1232 if(numDiff < minDiff){
1235 minGroup = it->second;
1237 for(int i=0;i<alnLength;i++){
1243 else if(numDiff == minDiff){
1249 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1250 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1251 else{ //use the best match
1253 seq.setUnaligned(rawSequence.substr(minPos));
1254 if(qual.getName() != ""){
1255 qual.trimQScores(minPos, -1);
1260 if (alignment != NULL) { delete alignment; }
1267 catch(exception& e) {
1268 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1273 //***************************************************************************************************************
1275 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1277 string rawSequence = seq.getUnaligned();
1278 bool success = 0; //guilty until proven innocent
1280 for(int i=0;i<numRPrimers;i++){
1281 string oligo = revPrimer[i];
1283 if(rawSequence.length() < oligo.length()){
1288 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1289 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1290 if(qual.getName() != ""){
1291 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1300 catch(exception& e) {
1301 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1306 //***************************************************************************************************************
1308 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1311 if(qscores.getName() != ""){
1312 qscores.trimQScores(-1, keepFirst);
1314 sequence.trim(keepFirst);
1317 catch(exception& e) {
1318 m->errorOut(e, "keepFirstTrim", "countDiffs");
1324 //***************************************************************************************************************
1326 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1330 int length = sequence.getNumBases() - removeLast;
1333 if(qscores.getName() != ""){
1334 qscores.trimQScores(-1, length);
1336 sequence.trim(length);
1345 catch(exception& e) {
1346 m->errorOut(e, "removeLastTrim", "countDiffs");
1352 //***************************************************************************************************************
1354 bool TrimSeqsCommand::cullLength(Sequence& seq){
1357 int length = seq.getNumBases();
1358 bool success = 0; //guilty until proven innocent
1360 if(length >= minLength && maxLength == 0) { success = 1; }
1361 else if(length >= minLength && length <= maxLength) { success = 1; }
1362 else { success = 0; }
1367 catch(exception& e) {
1368 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1374 //***************************************************************************************************************
1376 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1378 int longHomoP = seq.getLongHomoPolymer();
1379 bool success = 0; //guilty until proven innocent
1381 if(longHomoP <= maxHomoP){ success = 1; }
1382 else { success = 0; }
1386 catch(exception& e) {
1387 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1393 //***************************************************************************************************************
1395 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1397 int numNs = seq.getAmbigBases();
1398 bool success = 0; //guilty until proven innocent
1400 if(numNs <= maxAmbig) { success = 1; }
1401 else { success = 0; }
1405 catch(exception& e) {
1406 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1412 //***************************************************************************************************************
1414 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1417 int length = oligo.length();
1419 for(int i=0;i<length;i++){
1421 if(oligo[i] != seq[i]){
1422 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1423 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1424 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1425 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1426 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1427 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1428 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1429 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1430 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1431 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1432 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1433 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1435 if(success == 0) { break; }
1444 catch(exception& e) {
1445 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1451 //***************************************************************************************************************
1453 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1456 int length = oligo.length();
1459 for(int i=0;i<length;i++){
1461 if(oligo[i] != seq[i]){
1462 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1463 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1464 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1465 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1466 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1467 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1468 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1469 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1470 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1471 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1472 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1473 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1480 catch(exception& e) {
1481 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1487 //***************************************************************************************************************