5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
20 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
21 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
22 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
23 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
27 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
29 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
30 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
31 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
32 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
33 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
34 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
35 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
36 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
37 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
38 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
39 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41 vector<string> myArray;
42 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
46 m->errorOut(e, "TrimSeqsCommand", "setParameters");
50 //**********************************************************************************************************************
51 string TrimSeqsCommand::getHelpString(){
53 string helpString = "";
54 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
55 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
56 helpString += "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
57 helpString += "The fasta parameter is required.\n";
58 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
59 helpString += "The oligos parameter allows you to provide an oligos file.\n";
60 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
61 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
62 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
63 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
64 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
65 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
66 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
67 helpString += "The qfile parameter allows you to provide a quality file.\n";
68 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
69 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
70 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
71 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
72 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
73 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
74 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
75 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
76 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
77 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
78 helpString += "The trim.seqs command should be in the following format: \n";
79 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
80 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
81 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
82 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
83 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
93 //**********************************************************************************************************************
95 TrimSeqsCommand::TrimSeqsCommand(){
97 abort = true; calledHelp = true;
99 vector<string> tempOutNames;
100 outputTypes["fasta"] = tempOutNames;
101 outputTypes["qfile"] = tempOutNames;
102 outputTypes["group"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109 //***************************************************************************************************************
111 TrimSeqsCommand::TrimSeqsCommand(string option) {
114 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
121 vector<string> myArray = setParameters();
123 OptionParser parser(option);
124 map<string,string> parameters = parser.getParameters();
126 ValidParameters validParameter;
127 map<string,string>::iterator it;
129 //check to make sure all parameters are valid for command
130 for (it = parameters.begin(); it != parameters.end(); it++) {
131 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
134 //initialize outputTypes
135 vector<string> tempOutNames;
136 outputTypes["fasta"] = tempOutNames;
137 outputTypes["qfile"] = tempOutNames;
138 outputTypes["group"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
145 it = parameters.find("fasta");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["fasta"] = inputDir + it->second; }
153 it = parameters.find("oligos");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["oligos"] = inputDir + it->second; }
161 it = parameters.find("qfile");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["qfile"] = inputDir + it->second; }
172 //check for required parameters
173 fastaFile = validParameter.validFile(parameters, "fasta", true);
174 if (fastaFile == "not found") {
175 fastaFile = m->getFastaFile();
176 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
177 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
178 }else if (fastaFile == "not open") { abort = true; }
180 //if the user changes the output directory command factory will send this info to us in the output parameter
181 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
183 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
187 //check for optional parameter and set defaults
188 // ...at some point should added some additional type checking...
190 temp = validParameter.validFile(parameters, "flip", false);
191 if (temp == "not found"){ flip = 0; }
192 else if(m->isTrue(temp)) { flip = 1; }
194 temp = validParameter.validFile(parameters, "oligos", true);
195 if (temp == "not found"){ oligoFile = ""; }
196 else if(temp == "not open"){ abort = true; }
197 else { oligoFile = temp; }
200 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
201 convert(temp, maxAmbig);
203 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
204 convert(temp, maxHomoP);
206 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
207 convert(temp, minLength);
209 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
210 convert(temp, maxLength);
212 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
213 convert(temp, bdiffs);
215 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
216 convert(temp, pdiffs);
218 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
219 convert(temp, tdiffs);
221 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
223 temp = validParameter.validFile(parameters, "qfile", true);
224 if (temp == "not found") { qFileName = ""; }
225 else if(temp == "not open") { abort = true; }
226 else { qFileName = temp; }
228 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
229 convert(temp, qThreshold);
231 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
232 qtrim = m->isTrue(temp);
234 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
235 convert(temp, qRollAverage);
237 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
238 convert(temp, qWindowAverage);
240 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
241 convert(temp, qWindowSize);
243 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
244 convert(temp, qWindowStep);
246 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
247 convert(temp, qAverage);
249 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
250 convert(temp, keepFirst);
252 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
253 convert(temp, removeLast);
255 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
256 allFiles = m->isTrue(temp);
258 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
259 m->setProcessors(temp);
260 convert(temp, processors);
263 if(allFiles && (oligoFile == "")){
264 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
266 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
267 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
271 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
272 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
278 catch(exception& e) {
279 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
283 //***************************************************************************************************************
285 int TrimSeqsCommand::execute(){
288 if (abort == true) { if (calledHelp) { return 0; } return 2; }
290 numFPrimers = 0; //this needs to be initialized
292 vector<vector<string> > fastaFileNames;
293 vector<vector<string> > qualFileNames;
295 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
296 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
298 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
299 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
301 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
302 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
303 if (qFileName != "") {
304 outputNames.push_back(trimQualFile);
305 outputNames.push_back(scrapQualFile);
306 outputTypes["qfile"].push_back(trimQualFile);
307 outputTypes["qfile"].push_back(scrapQualFile);
310 string outputGroupFileName;
312 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
313 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
314 getOligos(fastaFileNames, qualFileNames);
317 vector<unsigned long int> fastaFilePos;
318 vector<unsigned long int> qFilePos;
320 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
322 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
323 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
324 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
326 if(qFileName == "") { qLines = lines; } //files with duds
328 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
330 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
332 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames);
335 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
338 if (m->control_pressed) { return 0; }
341 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
342 map<string, string>::iterator it;
343 set<string> namesToRemove;
344 for(int i=0;i<fastaFileNames.size();i++){
345 for(int j=0;j<fastaFileNames[0].size();j++){
346 if (fastaFileNames[i][j] != "") {
347 if(m->isBlank(fastaFileNames[i][j])){
348 remove(fastaFileNames[i][j].c_str());
349 namesToRemove.insert(fastaFileNames[i][j]);
352 remove(qualFileNames[i][j].c_str());
353 namesToRemove.insert(qualFileNames[i][j]);
356 it = uniqueFastaNames.find(fastaFileNames[i][j]);
357 if (it == uniqueFastaNames.end()) {
358 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
365 //remove names for outputFileNames, just cleans up the output
366 vector<string> outputNames2;
367 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
368 outputNames = outputNames2;
370 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
372 m->openInputFile(it->first, in);
375 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
376 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
377 m->openOutputFile(thisGroupName, out);
380 if (m->control_pressed) { break; }
382 Sequence currSeq(in); m->gobble(in);
383 out << currSeq.getName() << '\t' << it->second << endl;
390 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
392 //output group counts
393 m->mothurOutEndLine();
395 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
396 total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine();
398 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
400 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
402 //set fasta file as new current fastafile
404 itTypes = outputTypes.find("fasta");
405 if (itTypes != outputTypes.end()) {
406 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
409 itTypes = outputTypes.find("qfile");
410 if (itTypes != outputTypes.end()) {
411 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
414 itTypes = outputTypes.find("group");
415 if (itTypes != outputTypes.end()) {
416 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
419 m->mothurOutEndLine();
420 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
421 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
422 m->mothurOutEndLine();
427 catch(exception& e) {
428 m->errorOut(e, "TrimSeqsCommand", "execute");
433 /**************************************************************************************/
435 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {
439 ofstream trimFASTAFile;
440 m->openOutputFile(trimFileName, trimFASTAFile);
442 ofstream scrapFASTAFile;
443 m->openOutputFile(scrapFileName, scrapFASTAFile);
445 ofstream trimQualFile;
446 ofstream scrapQualFile;
448 m->openOutputFile(trimQFileName, trimQualFile);
449 m->openOutputFile(scrapQFileName, scrapQualFile);
452 ofstream outGroupsFile;
453 if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); }
455 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
456 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
457 if (fastaFileNames[i][j] != "") {
459 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
461 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
469 m->openInputFile(filename, inFASTA);
470 inFASTA.seekg(line->start);
473 if(qFileName != "") {
474 m->openInputFile(qFileName, qFile);
475 qFile.seekg(qline->start);
483 if (m->control_pressed) {
484 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
485 if (oligoFile != "") { outGroupsFile.close(); }
490 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
496 string trashCode = "";
497 int currentSeqsDiffs = 0;
499 Sequence currSeq(inFASTA); m->gobble(inFASTA);
501 QualityScores currQual;
503 currQual = QualityScores(qFile); m->gobble(qFile);
506 string origSeq = currSeq.getUnaligned();
509 int barcodeIndex = 0;
512 if(barcodes.size() != 0){
513 success = stripBarcode(currSeq, currQual, barcodeIndex);
514 if(success > bdiffs) { trashCode += 'b'; }
515 else{ currentSeqsDiffs += success; }
518 if(numFPrimers != 0){
519 success = stripForward(currSeq, currQual, primerIndex);
520 if(success > pdiffs) { trashCode += 'f'; }
521 else{ currentSeqsDiffs += success; }
524 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
526 if(numRPrimers != 0){
527 success = stripReverse(currSeq, currQual);
528 if(!success) { trashCode += 'r'; }
532 success = keepFirstTrim(currSeq, currQual);
536 success = removeLastTrim(currSeq, currQual);
537 if(!success) { trashCode += 'l'; }
542 int origLength = currSeq.getNumBases();
544 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
545 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
546 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
547 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
548 else { success = 1; }
550 //you don't want to trim, if it fails above then scrap it
551 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
553 if(!success) { trashCode += 'q'; }
556 if(minLength > 0 || maxLength > 0){
557 success = cullLength(currSeq);
558 if(!success) { trashCode += 'l'; }
561 success = cullHomoP(currSeq);
562 if(!success) { trashCode += 'h'; }
565 success = cullAmbigs(currSeq);
566 if(!success) { trashCode += 'n'; }
569 if(flip){ // should go last
570 currSeq.reverseComplement();
572 currQual.flipQScores();
576 if(trashCode.length() == 0){
577 currSeq.setAligned(currSeq.getUnaligned());
578 currSeq.printSequence(trimFASTAFile);
581 currQual.printQScores(trimQualFile);
584 if(barcodes.size() != 0){
585 string thisGroup = barcodeNameVector[barcodeIndex];
586 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
588 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
590 map<string, int>::iterator it = groupCounts.find(thisGroup);
591 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
592 else { groupCounts[it->first]++; }
599 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
600 currSeq.printSequence(output);
604 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
605 currQual.printQScores(output);
611 currSeq.setName(currSeq.getName() + '|' + trashCode);
612 currSeq.setUnaligned(origSeq);
613 currSeq.setAligned(origSeq);
614 currSeq.printSequence(scrapFASTAFile);
616 currQual.printQScores(scrapQualFile);
622 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
623 unsigned long int pos = inFASTA.tellg();
624 if ((pos == -1) || (pos >= line->end)) { break; }
626 if (inFASTA.eof()) { break; }
630 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
634 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
638 trimFASTAFile.close();
639 scrapFASTAFile.close();
640 if (oligoFile != "") { outGroupsFile.close(); }
641 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
645 catch(exception& e) {
646 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
651 /**************************************************************************************************/
653 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
655 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
660 //loop through and create all the processes you want
661 while (process != processors) {
665 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
669 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
670 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
675 for(int i=0;i<tempFASTAFileNames.size();i++){
676 for(int j=0;j<tempFASTAFileNames[i].size();j++){
677 if (tempFASTAFileNames[i][j] != "") {
678 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
679 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
682 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
683 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
690 driverCreateTrim(filename,
692 (trimFASTAFileName + toString(getpid()) + ".temp"),
693 (scrapFASTAFileName + toString(getpid()) + ".temp"),
694 (trimQualFileName + toString(getpid()) + ".temp"),
695 (scrapQualFileName + toString(getpid()) + ".temp"),
696 (groupFile + toString(getpid()) + ".temp"),
698 tempPrimerQualFileNames,
702 //pass groupCounts to parent
704 string tempFile = filename + toString(getpid()) + ".num.temp";
705 m->openOutputFile(tempFile, out);
706 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
707 out << it->first << '\t' << it->second << endl;
713 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
714 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
721 m->openOutputFile(trimFASTAFileName, temp); temp.close();
722 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
723 m->openOutputFile(trimQualFileName, temp); temp.close();
724 m->openOutputFile(scrapQualFileName, temp); temp.close();
726 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
728 //force parent to wait until all the processes are done
729 for (int i=0;i<processIDS.size();i++) {
730 int temp = processIDS[i];
735 for(int i=0;i<processIDS.size();i++){
737 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
739 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
740 remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
741 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
742 remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
745 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
746 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
747 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
748 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
751 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
752 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
756 for(int j=0;j<fastaFileNames.size();j++){
757 for(int k=0;k<fastaFileNames[j].size();k++){
758 if (fastaFileNames[j][k] != "") {
759 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
760 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
763 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
764 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
772 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
773 m->openInputFile(tempFile, in);
777 in >> group >> tempNum; m->gobble(in);
779 map<string, int>::iterator it = groupCounts.find(group);
780 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
781 else { groupCounts[it->first] += tempNum; }
783 in.close(); remove(tempFile.c_str());
790 catch(exception& e) {
791 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
796 /**************************************************************************************************/
798 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
801 //set file positions for fasta file
802 fastaFilePos = m->divideFile(filename, processors);
804 if (qfilename == "") { return processors; }
806 //get name of first sequence in each chunk
807 map<string, int> firstSeqNames;
808 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
810 m->openInputFile(filename, in);
811 in.seekg(fastaFilePos[i]);
814 firstSeqNames[temp.getName()] = i;
819 //seach for filePos of each first name in the qfile and save in qfileFilePos
821 m->openInputFile(qfilename, inQual);
824 while(!inQual.eof()){
825 input = m->getline(inQual);
827 if (input.length() != 0) {
828 if(input[0] == '>'){ //this is a sequence name line
829 istringstream nameStream(input);
831 string sname = ""; nameStream >> sname;
832 sname = sname.substr(1);
834 map<string, int>::iterator it = firstSeqNames.find(sname);
836 if(it != firstSeqNames.end()) { //this is the start of a new chunk
837 unsigned long int pos = inQual.tellg();
838 qfileFilePos.push_back(pos - input.length() - 1);
839 firstSeqNames.erase(it);
844 if (firstSeqNames.size() == 0) { break; }
849 if (firstSeqNames.size() != 0) {
850 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
851 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
857 //get last file position of qfile
859 unsigned long int size;
861 //get num bytes in file
862 pFile = fopen (qfilename.c_str(),"rb");
863 if (pFile==NULL) perror ("Error opening file");
865 fseek (pFile, 0, SEEK_END);
870 qfileFilePos.push_back(size);
874 catch(exception& e) {
875 m->errorOut(e, "TrimSeqsCommand", "setLines");
880 //***************************************************************************************************************
882 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
885 m->openInputFile(oligoFile, inOligos);
889 string type, oligo, group;
892 int indexBarcode = 0;
894 while(!inOligos.eof()){
899 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
904 //make type case insensitive
905 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
909 for(int i=0;i<oligo.length();i++){
910 oligo[i] = toupper(oligo[i]);
911 if(oligo[i] == 'U') { oligo[i] = 'T'; }
914 if(type == "FORWARD"){
917 // get rest of line in case there is a primer name
918 while (!inOligos.eof()) {
919 char c = inOligos.get();
920 if (c == 10 || c == 13){ break; }
921 else if (c == 32 || c == 9){;} //space or tab
925 //check for repeat barcodes
926 map<string, int>::iterator itPrime = primers.find(oligo);
927 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
929 primers[oligo]=indexPrimer; indexPrimer++;
930 primerNameVector.push_back(group);
932 else if(type == "REVERSE"){
933 Sequence oligoRC("reverse", oligo);
934 oligoRC.reverseComplement();
935 revPrimer.push_back(oligoRC.getUnaligned());
937 else if(type == "BARCODE"){
940 //check for repeat barcodes
941 map<string, int>::iterator itBar = barcodes.find(oligo);
942 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
944 barcodes[oligo]=indexBarcode; indexBarcode++;
945 barcodeNameVector.push_back(group);
947 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
953 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
955 //add in potential combos
956 if(barcodeNameVector.size() == 0){
958 barcodeNameVector.push_back("");
961 if(primerNameVector.size() == 0){
963 primerNameVector.push_back("");
966 fastaFileNames.resize(barcodeNameVector.size());
967 for(int i=0;i<fastaFileNames.size();i++){
968 fastaFileNames[i].assign(primerNameVector.size(), "");
970 if(qFileName != ""){ qualFileNames = fastaFileNames; }
973 set<string> uniqueNames; //used to cleanup outputFileNames
974 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
975 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
977 string primerName = primerNameVector[itPrimer->second];
978 string barcodeName = barcodeNameVector[itBar->second];
980 string comboGroupName = "";
981 string fastaFileName = "";
982 string qualFileName = "";
984 if(primerName == ""){
985 comboGroupName = barcodeNameVector[itBar->second];
988 if(barcodeName == ""){
989 comboGroupName = primerNameVector[itPrimer->second];
992 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
997 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
998 if (uniqueNames.count(fastaFileName) == 0) {
999 outputNames.push_back(fastaFileName);
1000 outputTypes["fasta"].push_back(fastaFileName);
1001 uniqueNames.insert(fastaFileName);
1004 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1005 m->openOutputFile(fastaFileName, temp); temp.close();
1007 if(qFileName != ""){
1008 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1009 if (uniqueNames.count(fastaFileName) == 0) {
1010 outputNames.push_back(qualFileName);
1011 outputTypes["qfile"].push_back(qualFileName);
1014 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1015 m->openOutputFile(qualFileName, temp); temp.close();
1020 numFPrimers = primers.size();
1021 numRPrimers = revPrimer.size();
1024 catch(exception& e) {
1025 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1030 //***************************************************************************************************************
1032 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1035 string rawSequence = seq.getUnaligned();
1036 int success = bdiffs + 1; //guilty until proven innocent
1038 //can you find the barcode
1039 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1040 string oligo = it->first;
1041 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1042 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1046 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1048 seq.setUnaligned(rawSequence.substr(oligo.length()));
1050 if(qual.getName() != ""){
1051 qual.trimQScores(oligo.length(), -1);
1059 //if you found the barcode or if you don't want to allow for diffs
1060 if ((bdiffs == 0) || (success == 0)) { return success; }
1062 else { //try aligning and see if you can find it
1066 Alignment* alignment;
1067 if (barcodes.size() > 0) {
1068 map<string,int>::iterator it=barcodes.begin();
1070 for(it;it!=barcodes.end();it++){
1071 if(it->first.length() > maxLength){
1072 maxLength = it->first.length();
1075 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1077 }else{ alignment = NULL; }
1079 //can you find the barcode
1085 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1086 string oligo = it->first;
1087 // int length = oligo.length();
1089 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1090 success = bdiffs + 10;
1094 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1095 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1096 oligo = alignment->getSeqAAln();
1097 string temp = alignment->getSeqBAln();
1099 int alnLength = oligo.length();
1101 for(int i=oligo.length()-1;i>=0;i--){
1102 if(oligo[i] != '-'){ alnLength = i+1; break; }
1104 oligo = oligo.substr(0,alnLength);
1105 temp = temp.substr(0,alnLength);
1107 int numDiff = countDiffs(oligo, temp);
1109 if(numDiff < minDiff){
1112 minGroup = it->second;
1114 for(int i=0;i<alnLength;i++){
1120 else if(numDiff == minDiff){
1126 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1127 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1128 else{ //use the best match
1130 seq.setUnaligned(rawSequence.substr(minPos));
1132 if(qual.getName() != ""){
1133 qual.trimQScores(minPos, -1);
1138 if (alignment != NULL) { delete alignment; }
1145 catch(exception& e) {
1146 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1152 //***************************************************************************************************************
1154 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1156 string rawSequence = seq.getUnaligned();
1157 int success = pdiffs + 1; //guilty until proven innocent
1159 //can you find the primer
1160 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1161 string oligo = it->first;
1162 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1163 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1167 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1169 seq.setUnaligned(rawSequence.substr(oligo.length()));
1170 if(qual.getName() != ""){
1171 qual.trimQScores(oligo.length(), -1);
1178 //if you found the barcode or if you don't want to allow for diffs
1179 if ((pdiffs == 0) || (success == 0)) { return success; }
1181 else { //try aligning and see if you can find it
1185 Alignment* alignment;
1186 if (primers.size() > 0) {
1187 map<string,int>::iterator it=primers.begin();
1189 for(it;it!=primers.end();it++){
1190 if(it->first.length() > maxLength){
1191 maxLength = it->first.length();
1194 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1196 }else{ alignment = NULL; }
1198 //can you find the barcode
1204 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1205 string oligo = it->first;
1206 // int length = oligo.length();
1208 if(rawSequence.length() < maxLength){
1209 success = pdiffs + 100;
1213 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1214 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1215 oligo = alignment->getSeqAAln();
1216 string temp = alignment->getSeqBAln();
1218 int alnLength = oligo.length();
1220 for(int i=oligo.length()-1;i>=0;i--){
1221 if(oligo[i] != '-'){ alnLength = i+1; break; }
1223 oligo = oligo.substr(0,alnLength);
1224 temp = temp.substr(0,alnLength);
1226 int numDiff = countDiffs(oligo, temp);
1228 if(numDiff < minDiff){
1231 minGroup = it->second;
1233 for(int i=0;i<alnLength;i++){
1239 else if(numDiff == minDiff){
1245 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1246 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1247 else{ //use the best match
1249 seq.setUnaligned(rawSequence.substr(minPos));
1250 if(qual.getName() != ""){
1251 qual.trimQScores(minPos, -1);
1256 if (alignment != NULL) { delete alignment; }
1263 catch(exception& e) {
1264 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1269 //***************************************************************************************************************
1271 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1273 string rawSequence = seq.getUnaligned();
1274 bool success = 0; //guilty until proven innocent
1276 for(int i=0;i<numRPrimers;i++){
1277 string oligo = revPrimer[i];
1279 if(rawSequence.length() < oligo.length()){
1284 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1285 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1286 if(qual.getName() != ""){
1287 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1296 catch(exception& e) {
1297 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1302 //***************************************************************************************************************
1304 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1307 if(qscores.getName() != ""){
1308 qscores.trimQScores(-1, keepFirst);
1310 sequence.trim(keepFirst);
1313 catch(exception& e) {
1314 m->errorOut(e, "keepFirstTrim", "countDiffs");
1320 //***************************************************************************************************************
1322 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1326 int length = sequence.getNumBases() - removeLast;
1329 if(qscores.getName() != ""){
1330 qscores.trimQScores(-1, length);
1332 sequence.trim(length);
1341 catch(exception& e) {
1342 m->errorOut(e, "removeLastTrim", "countDiffs");
1348 //***************************************************************************************************************
1350 bool TrimSeqsCommand::cullLength(Sequence& seq){
1353 int length = seq.getNumBases();
1354 bool success = 0; //guilty until proven innocent
1356 if(length >= minLength && maxLength == 0) { success = 1; }
1357 else if(length >= minLength && length <= maxLength) { success = 1; }
1358 else { success = 0; }
1363 catch(exception& e) {
1364 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1370 //***************************************************************************************************************
1372 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1374 int longHomoP = seq.getLongHomoPolymer();
1375 bool success = 0; //guilty until proven innocent
1377 if(longHomoP <= maxHomoP){ success = 1; }
1378 else { success = 0; }
1382 catch(exception& e) {
1383 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1389 //***************************************************************************************************************
1391 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1393 int numNs = seq.getAmbigBases();
1394 bool success = 0; //guilty until proven innocent
1396 if(numNs <= maxAmbig) { success = 1; }
1397 else { success = 0; }
1401 catch(exception& e) {
1402 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1408 //***************************************************************************************************************
1410 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1413 int length = oligo.length();
1415 for(int i=0;i<length;i++){
1417 if(oligo[i] != seq[i]){
1418 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1419 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1420 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1421 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1422 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1423 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1424 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1425 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1426 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1427 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1428 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1429 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1431 if(success == 0) { break; }
1440 catch(exception& e) {
1441 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1447 //***************************************************************************************************************
1449 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1452 int length = oligo.length();
1455 for(int i=0;i<length;i++){
1457 if(oligo[i] != seq[i]){
1458 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1459 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1460 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1461 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1462 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1463 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1464 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1465 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1466 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1467 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1468 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1469 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1476 catch(exception& e) {
1477 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1483 //***************************************************************************************************************