5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
20 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
21 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
22 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
23 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
27 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
29 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
30 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
31 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
32 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
33 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
34 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
35 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
36 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
37 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
38 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
39 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41 vector<string> myArray;
42 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
46 m->errorOut(e, "TrimSeqsCommand", "setParameters");
50 //**********************************************************************************************************************
51 string TrimSeqsCommand::getHelpString(){
53 string helpString = "";
54 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
55 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
56 helpString += "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
57 helpString += "The fasta parameter is required.\n";
58 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
59 helpString += "The oligos parameter allows you to provide an oligos file.\n";
60 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
61 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
62 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
63 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
64 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
65 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
66 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
67 helpString += "The qfile parameter allows you to provide a quality file.\n";
68 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
69 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
70 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
71 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
72 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
73 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
74 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
75 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
76 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
77 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
78 helpString += "The trim.seqs command should be in the following format: \n";
79 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
80 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
81 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
82 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
83 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n";
87 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
93 //**********************************************************************************************************************
95 TrimSeqsCommand::TrimSeqsCommand(){
97 abort = true; calledHelp = true;
99 vector<string> tempOutNames;
100 outputTypes["fasta"] = tempOutNames;
101 outputTypes["qfile"] = tempOutNames;
102 outputTypes["group"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109 //***************************************************************************************************************
111 TrimSeqsCommand::TrimSeqsCommand(string option) {
114 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
121 vector<string> myArray = setParameters();
123 OptionParser parser(option);
124 map<string,string> parameters = parser.getParameters();
126 ValidParameters validParameter;
127 map<string,string>::iterator it;
129 //check to make sure all parameters are valid for command
130 for (it = parameters.begin(); it != parameters.end(); it++) {
131 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
134 //initialize outputTypes
135 vector<string> tempOutNames;
136 outputTypes["fasta"] = tempOutNames;
137 outputTypes["qfile"] = tempOutNames;
138 outputTypes["group"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
145 it = parameters.find("fasta");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["fasta"] = inputDir + it->second; }
153 it = parameters.find("oligos");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["oligos"] = inputDir + it->second; }
161 it = parameters.find("qfile");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["qfile"] = inputDir + it->second; }
172 //check for required parameters
173 fastaFile = validParameter.validFile(parameters, "fasta", true);
174 if (fastaFile == "not found") {
175 fastaFile = m->getFastaFile();
176 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
177 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
178 }else if (fastaFile == "not open") { abort = true; }
180 //if the user changes the output directory command factory will send this info to us in the output parameter
181 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
183 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
187 //check for optional parameter and set defaults
188 // ...at some point should added some additional type checking...
190 temp = validParameter.validFile(parameters, "flip", false);
191 if (temp == "not found"){ flip = 0; }
192 else if(m->isTrue(temp)) { flip = 1; }
194 temp = validParameter.validFile(parameters, "oligos", true);
195 if (temp == "not found"){ oligoFile = ""; }
196 else if(temp == "not open"){ abort = true; }
197 else { oligoFile = temp; }
200 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
201 convert(temp, maxAmbig);
203 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
204 convert(temp, maxHomoP);
206 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
207 convert(temp, minLength);
209 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
210 convert(temp, maxLength);
212 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
213 convert(temp, bdiffs);
215 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
216 convert(temp, pdiffs);
218 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
219 convert(temp, tdiffs);
221 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
223 temp = validParameter.validFile(parameters, "qfile", true);
224 if (temp == "not found") { qFileName = ""; }
225 else if(temp == "not open") { abort = true; }
226 else { qFileName = temp; }
228 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
229 convert(temp, qThreshold);
231 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
232 qtrim = m->isTrue(temp);
234 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
235 convert(temp, qRollAverage);
237 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
238 convert(temp, qWindowAverage);
240 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
241 convert(temp, qWindowSize);
243 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
244 convert(temp, qWindowStep);
246 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
247 convert(temp, qAverage);
249 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
250 convert(temp, keepFirst);
252 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
253 convert(temp, removeLast);
255 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
256 allFiles = m->isTrue(temp);
258 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
259 m->setProcessors(temp);
260 convert(temp, processors);
263 if(allFiles && (oligoFile == "")){
264 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
266 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
267 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
271 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
272 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
278 catch(exception& e) {
279 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
283 //***************************************************************************************************************
285 int TrimSeqsCommand::execute(){
288 if (abort == true) { if (calledHelp) { return 0; } return 2; }
290 numFPrimers = 0; //this needs to be initialized
292 vector<vector<string> > fastaFileNames;
293 vector<vector<string> > qualFileNames;
295 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
296 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
298 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
299 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
301 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
302 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
303 if (qFileName != "") {
304 outputNames.push_back(trimQualFile);
305 outputNames.push_back(scrapQualFile);
306 outputTypes["qfile"].push_back(trimQualFile);
307 outputTypes["qfile"].push_back(scrapQualFile);
310 string outputGroupFileName;
312 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
313 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
314 getOligos(fastaFileNames, qualFileNames);
317 vector<unsigned long int> fastaFilePos;
318 vector<unsigned long int> qFilePos;
320 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
322 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
323 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
324 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
326 if(qFileName == "") { qLines = lines; } //files with duds
328 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
330 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
332 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames);
335 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
338 if (m->control_pressed) { return 0; }
341 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
342 map<string, string>::iterator it;
343 set<string> namesToRemove;
344 for(int i=0;i<fastaFileNames.size();i++){
345 for(int j=0;j<fastaFileNames[0].size();j++){
346 if (fastaFileNames[i][j] != "") {
347 if(m->isBlank(fastaFileNames[i][j])){
348 remove(fastaFileNames[i][j].c_str());
349 namesToRemove.insert(fastaFileNames[i][j]);
352 remove(qualFileNames[i][j].c_str());
353 namesToRemove.insert(qualFileNames[i][j]);
356 it = uniqueFastaNames.find(fastaFileNames[i][j]);
357 if (it == uniqueFastaNames.end()) {
358 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
365 //remove names for outputFileNames, just cleans up the output
366 vector<string> outputNames2;
367 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
368 outputNames = outputNames2;
370 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
372 m->openInputFile(it->first, in);
375 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
376 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
377 m->openOutputFile(thisGroupName, out);
380 if (m->control_pressed) { break; }
382 Sequence currSeq(in); m->gobble(in);
383 out << currSeq.getName() << '\t' << it->second << endl;
390 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
392 //output group counts
393 m->mothurOutEndLine();
395 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
396 total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine();
398 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
400 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
402 //set fasta file as new current fastafile
404 itTypes = outputTypes.find("fasta");
405 if (itTypes != outputTypes.end()) {
406 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
409 itTypes = outputTypes.find("qfile");
410 if (itTypes != outputTypes.end()) {
411 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
414 itTypes = outputTypes.find("group");
415 if (itTypes != outputTypes.end()) {
416 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
419 m->mothurOutEndLine();
420 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
421 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
422 m->mothurOutEndLine();
427 catch(exception& e) {
428 m->errorOut(e, "TrimSeqsCommand", "execute");
433 /**************************************************************************************/
435 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {
439 ofstream trimFASTAFile;
440 m->openOutputFile(trimFileName, trimFASTAFile);
442 ofstream scrapFASTAFile;
443 m->openOutputFile(scrapFileName, scrapFASTAFile);
445 ofstream trimQualFile;
446 ofstream scrapQualFile;
448 m->openOutputFile(trimQFileName, trimQualFile);
449 m->openOutputFile(scrapQFileName, scrapQualFile);
452 ofstream outGroupsFile;
453 if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); }
455 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
456 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
457 if (fastaFileNames[i][j] != "") {
459 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
461 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
469 m->openInputFile(filename, inFASTA);
470 inFASTA.seekg(line->start);
473 if(qFileName != "") {
474 m->openInputFile(qFileName, qFile);
475 qFile.seekg(qline->start);
483 if (m->control_pressed) {
484 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
485 if (oligoFile != "") { outGroupsFile.close(); }
490 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
496 string trashCode = "";
497 int currentSeqsDiffs = 0;
499 Sequence currSeq(inFASTA); m->gobble(inFASTA);
501 QualityScores currQual;
503 currQual = QualityScores(qFile); m->gobble(qFile);
506 string origSeq = currSeq.getUnaligned();
509 int barcodeIndex = 0;
512 if(barcodes.size() != 0){
513 success = stripBarcode(currSeq, currQual, barcodeIndex);
514 if(success > bdiffs) { trashCode += 'b'; }
515 else{ currentSeqsDiffs += success; }
518 if(numFPrimers != 0){
519 success = stripForward(currSeq, currQual, primerIndex);
520 if(success > pdiffs) { trashCode += 'f'; }
521 else{ currentSeqsDiffs += success; }
524 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
526 if(numRPrimers != 0){
527 success = stripReverse(currSeq, currQual);
528 if(!success) { trashCode += 'r'; }
532 success = keepFirstTrim(currSeq, currQual);
536 success = removeLastTrim(currSeq, currQual);
537 if(!success) { trashCode += 'l'; }
542 int origLength = currSeq.getNumBases();
544 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
545 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
546 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
547 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
548 else { success = 1; }
550 //you don't want to trim, if it fails above then scrap it
551 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
553 if(!success) { trashCode += 'q'; }
556 if(minLength > 0 || maxLength > 0){
557 success = cullLength(currSeq);
558 if(!success) { trashCode += 'l'; }
561 success = cullHomoP(currSeq);
562 if(!success) { trashCode += 'h'; }
565 success = cullAmbigs(currSeq);
566 if(!success) { trashCode += 'n'; }
569 if(flip){ // should go last
570 currSeq.reverseComplement();
572 currQual.flipQScores();
576 if(trashCode.length() == 0){
577 currSeq.setAligned(currSeq.getUnaligned());
578 currSeq.printSequence(trimFASTAFile);
581 currQual.printQScores(trimQualFile);
584 if(barcodes.size() != 0){
585 string thisGroup = barcodeNameVector[barcodeIndex];
586 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
588 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
590 map<string, int>::iterator it = groupCounts.find(thisGroup);
591 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
592 else { groupCounts[it->first]++; }
599 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
600 currSeq.printSequence(output);
604 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
605 currQual.printQScores(output);
611 currSeq.setName(currSeq.getName() + '|' + trashCode);
612 currSeq.setUnaligned(origSeq);
613 currSeq.setAligned(origSeq);
614 currSeq.printSequence(scrapFASTAFile);
616 currQual.printQScores(scrapQualFile);
622 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
623 unsigned long int pos = inFASTA.tellg();
624 if ((pos == -1) || (pos >= line->end)) { break; }
626 if (inFASTA.eof()) { break; }
630 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
634 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
638 trimFASTAFile.close();
639 scrapFASTAFile.close();
640 if (oligoFile != "") { outGroupsFile.close(); }
641 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
645 catch(exception& e) {
646 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
651 /**************************************************************************************************/
653 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
655 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
660 //loop through and create all the processes you want
661 while (process != processors) {
665 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
669 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
670 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
675 for(int i=0;i<tempFASTAFileNames.size();i++){
676 for(int j=0;j<tempFASTAFileNames[i].size();j++){
677 if (tempFASTAFileNames[i][j] != "") {
678 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
679 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
682 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
683 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
690 driverCreateTrim(filename,
692 (trimFASTAFileName + toString(getpid()) + ".temp"),
693 (scrapFASTAFileName + toString(getpid()) + ".temp"),
694 (trimQualFileName + toString(getpid()) + ".temp"),
695 (scrapQualFileName + toString(getpid()) + ".temp"),
696 (groupFile + toString(getpid()) + ".temp"),
698 tempPrimerQualFileNames,
702 //pass groupCounts to parent
704 string tempFile = filename + toString(getpid()) + ".num.temp";
705 m->openOutputFile(tempFile, out);
706 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
707 out << it->first << '\t' << it->second << endl;
713 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
714 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
721 m->openOutputFile(trimFASTAFileName, temp); temp.close();
722 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
723 m->openOutputFile(trimQualFileName, temp); temp.close();
724 m->openOutputFile(scrapQualFileName, temp); temp.close();
726 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
728 //force parent to wait until all the processes are done
729 for (int i=0;i<processIDS.size();i++) {
730 int temp = processIDS[i];
735 for(int i=0;i<processIDS.size();i++){
737 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
739 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
740 remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
741 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
742 remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
745 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
746 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
747 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
748 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
751 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
752 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
756 for(int j=0;j<fastaFileNames.size();j++){
757 for(int k=0;k<fastaFileNames[j].size();k++){
758 if (fastaFileNames[j][k] != "") {
759 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
760 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
763 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
764 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
772 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
773 m->openInputFile(tempFile, in);
777 in >> group >> tempNum; m->gobble(in);
779 map<string, int>::iterator it = groupCounts.find(group);
780 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
781 else { groupCounts[it->first] += tempNum; }
783 in.close(); remove(tempFile.c_str());
790 catch(exception& e) {
791 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
796 /**************************************************************************************************/
798 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
801 //set file positions for fasta file
802 fastaFilePos = m->divideFile(filename, processors);
804 if (qfilename == "") { return processors; }
806 //get name of first sequence in each chunk
807 map<string, int> firstSeqNames;
808 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
810 m->openInputFile(filename, in);
811 in.seekg(fastaFilePos[i]);
814 firstSeqNames[temp.getName()] = i;
819 //seach for filePos of each first name in the qfile and save in qfileFilePos
821 m->openInputFile(qfilename, inQual);
824 while(!inQual.eof()){
825 input = m->getline(inQual);
827 if (input.length() != 0) {
828 if(input[0] == '>'){ //this is a sequence name line
829 istringstream nameStream(input);
831 string sname = ""; nameStream >> sname;
832 sname = sname.substr(1);
834 map<string, int>::iterator it = firstSeqNames.find(sname);
836 if(it != firstSeqNames.end()) { //this is the start of a new chunk
837 unsigned long int pos = inQual.tellg();
838 qfileFilePos.push_back(pos - input.length() - 1);
839 firstSeqNames.erase(it);
844 if (firstSeqNames.size() == 0) { break; }
849 if (firstSeqNames.size() != 0) {
850 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
851 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
857 //get last file position of qfile
859 unsigned long int size;
861 //get num bytes in file
862 pFile = fopen (qfilename.c_str(),"rb");
863 if (pFile==NULL) perror ("Error opening file");
865 fseek (pFile, 0, SEEK_END);
870 qfileFilePos.push_back(size);
874 catch(exception& e) {
875 m->errorOut(e, "TrimSeqsCommand", "setLines");
880 //***************************************************************************************************************
882 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
885 m->openInputFile(oligoFile, inOligos);
889 string type, oligo, group;
892 int indexBarcode = 0;
894 while(!inOligos.eof()){
896 inOligos >> type; m->gobble(inOligos);
899 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
902 //make type case insensitive
903 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
907 for(int i=0;i<oligo.length();i++){
908 oligo[i] = toupper(oligo[i]);
909 if(oligo[i] == 'U') { oligo[i] = 'T'; }
912 if(type == "FORWARD"){
915 // get rest of line in case there is a primer name
916 while (!inOligos.eof()) {
917 char c = inOligos.get();
918 if (c == 10 || c == 13){ break; }
919 else if (c == 32 || c == 9){;} //space or tab
923 //check for repeat barcodes
924 map<string, int>::iterator itPrime = primers.find(oligo);
925 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
927 primers[oligo]=indexPrimer; indexPrimer++;
928 primerNameVector.push_back(group);
930 else if(type == "REVERSE"){
931 Sequence oligoRC("reverse", oligo);
932 oligoRC.reverseComplement();
933 revPrimer.push_back(oligoRC.getUnaligned());
935 else if(type == "BARCODE"){
938 //check for repeat barcodes
939 map<string, int>::iterator itBar = barcodes.find(oligo);
940 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
942 barcodes[oligo]=indexBarcode; indexBarcode++;
943 barcodeNameVector.push_back(group);
945 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
951 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
953 //add in potential combos
954 if(barcodeNameVector.size() == 0){
956 barcodeNameVector.push_back("");
959 if(primerNameVector.size() == 0){
961 primerNameVector.push_back("");
964 fastaFileNames.resize(barcodeNameVector.size());
965 for(int i=0;i<fastaFileNames.size();i++){
966 fastaFileNames[i].assign(primerNameVector.size(), "");
968 if(qFileName != ""){ qualFileNames = fastaFileNames; }
971 set<string> uniqueNames; //used to cleanup outputFileNames
972 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
973 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
975 string primerName = primerNameVector[itPrimer->second];
976 string barcodeName = barcodeNameVector[itBar->second];
978 string comboGroupName = "";
979 string fastaFileName = "";
980 string qualFileName = "";
982 if(primerName == ""){
983 comboGroupName = barcodeNameVector[itBar->second];
986 if(barcodeName == ""){
987 comboGroupName = primerNameVector[itPrimer->second];
990 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
995 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
996 if (uniqueNames.count(fastaFileName) == 0) {
997 outputNames.push_back(fastaFileName);
998 outputTypes["fasta"].push_back(fastaFileName);
999 uniqueNames.insert(fastaFileName);
1002 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1003 m->openOutputFile(fastaFileName, temp); temp.close();
1005 if(qFileName != ""){
1006 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1007 if (uniqueNames.count(fastaFileName) == 0) {
1008 outputNames.push_back(qualFileName);
1009 outputTypes["qfile"].push_back(qualFileName);
1012 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1013 m->openOutputFile(qualFileName, temp); temp.close();
1018 numFPrimers = primers.size();
1019 numRPrimers = revPrimer.size();
1022 catch(exception& e) {
1023 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1028 //***************************************************************************************************************
1030 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1033 string rawSequence = seq.getUnaligned();
1034 int success = bdiffs + 1; //guilty until proven innocent
1036 //can you find the barcode
1037 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1038 string oligo = it->first;
1039 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1040 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1044 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1046 seq.setUnaligned(rawSequence.substr(oligo.length()));
1048 if(qual.getName() != ""){
1049 qual.trimQScores(oligo.length(), -1);
1057 //if you found the barcode or if you don't want to allow for diffs
1058 if ((bdiffs == 0) || (success == 0)) { return success; }
1060 else { //try aligning and see if you can find it
1064 Alignment* alignment;
1065 if (barcodes.size() > 0) {
1066 map<string,int>::iterator it=barcodes.begin();
1068 for(it;it!=barcodes.end();it++){
1069 if(it->first.length() > maxLength){
1070 maxLength = it->first.length();
1073 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1075 }else{ alignment = NULL; }
1077 //can you find the barcode
1083 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1084 string oligo = it->first;
1085 // int length = oligo.length();
1087 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1088 success = bdiffs + 10;
1092 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1093 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1094 oligo = alignment->getSeqAAln();
1095 string temp = alignment->getSeqBAln();
1097 int alnLength = oligo.length();
1099 for(int i=oligo.length()-1;i>=0;i--){
1100 if(oligo[i] != '-'){ alnLength = i+1; break; }
1102 oligo = oligo.substr(0,alnLength);
1103 temp = temp.substr(0,alnLength);
1105 int numDiff = countDiffs(oligo, temp);
1107 if(numDiff < minDiff){
1110 minGroup = it->second;
1112 for(int i=0;i<alnLength;i++){
1118 else if(numDiff == minDiff){
1124 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1125 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1126 else{ //use the best match
1128 seq.setUnaligned(rawSequence.substr(minPos));
1130 if(qual.getName() != ""){
1131 qual.trimQScores(minPos, -1);
1136 if (alignment != NULL) { delete alignment; }
1143 catch(exception& e) {
1144 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1150 //***************************************************************************************************************
1152 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1154 string rawSequence = seq.getUnaligned();
1155 int success = pdiffs + 1; //guilty until proven innocent
1157 //can you find the primer
1158 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1159 string oligo = it->first;
1160 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1161 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1165 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1167 seq.setUnaligned(rawSequence.substr(oligo.length()));
1168 if(qual.getName() != ""){
1169 qual.trimQScores(oligo.length(), -1);
1176 //if you found the barcode or if you don't want to allow for diffs
1177 if ((pdiffs == 0) || (success == 0)) { return success; }
1179 else { //try aligning and see if you can find it
1183 Alignment* alignment;
1184 if (primers.size() > 0) {
1185 map<string,int>::iterator it=primers.begin();
1187 for(it;it!=primers.end();it++){
1188 if(it->first.length() > maxLength){
1189 maxLength = it->first.length();
1192 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1194 }else{ alignment = NULL; }
1196 //can you find the barcode
1202 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1203 string oligo = it->first;
1204 // int length = oligo.length();
1206 if(rawSequence.length() < maxLength){
1207 success = pdiffs + 100;
1211 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1212 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1213 oligo = alignment->getSeqAAln();
1214 string temp = alignment->getSeqBAln();
1216 int alnLength = oligo.length();
1218 for(int i=oligo.length()-1;i>=0;i--){
1219 if(oligo[i] != '-'){ alnLength = i+1; break; }
1221 oligo = oligo.substr(0,alnLength);
1222 temp = temp.substr(0,alnLength);
1224 int numDiff = countDiffs(oligo, temp);
1226 if(numDiff < minDiff){
1229 minGroup = it->second;
1231 for(int i=0;i<alnLength;i++){
1237 else if(numDiff == minDiff){
1243 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1244 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1245 else{ //use the best match
1247 seq.setUnaligned(rawSequence.substr(minPos));
1248 if(qual.getName() != ""){
1249 qual.trimQScores(minPos, -1);
1254 if (alignment != NULL) { delete alignment; }
1261 catch(exception& e) {
1262 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1267 //***************************************************************************************************************
1269 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1271 string rawSequence = seq.getUnaligned();
1272 bool success = 0; //guilty until proven innocent
1274 for(int i=0;i<numRPrimers;i++){
1275 string oligo = revPrimer[i];
1277 if(rawSequence.length() < oligo.length()){
1282 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1283 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1284 if(qual.getName() != ""){
1285 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1294 catch(exception& e) {
1295 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1300 //***************************************************************************************************************
1302 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1305 if(qscores.getName() != ""){
1306 qscores.trimQScores(-1, keepFirst);
1308 sequence.trim(keepFirst);
1311 catch(exception& e) {
1312 m->errorOut(e, "keepFirstTrim", "countDiffs");
1318 //***************************************************************************************************************
1320 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1324 int length = sequence.getNumBases() - removeLast;
1327 if(qscores.getName() != ""){
1328 qscores.trimQScores(-1, length);
1330 sequence.trim(length);
1339 catch(exception& e) {
1340 m->errorOut(e, "removeLastTrim", "countDiffs");
1346 //***************************************************************************************************************
1348 bool TrimSeqsCommand::cullLength(Sequence& seq){
1351 int length = seq.getNumBases();
1352 bool success = 0; //guilty until proven innocent
1354 if(length >= minLength && maxLength == 0) { success = 1; }
1355 else if(length >= minLength && length <= maxLength) { success = 1; }
1356 else { success = 0; }
1361 catch(exception& e) {
1362 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1368 //***************************************************************************************************************
1370 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1372 int longHomoP = seq.getLongHomoPolymer();
1373 bool success = 0; //guilty until proven innocent
1375 if(longHomoP <= maxHomoP){ success = 1; }
1376 else { success = 0; }
1380 catch(exception& e) {
1381 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1387 //***************************************************************************************************************
1389 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1391 int numNs = seq.getAmbigBases();
1392 bool success = 0; //guilty until proven innocent
1394 if(numNs <= maxAmbig) { success = 1; }
1395 else { success = 0; }
1399 catch(exception& e) {
1400 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1406 //***************************************************************************************************************
1408 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1411 int length = oligo.length();
1413 for(int i=0;i<length;i++){
1415 if(oligo[i] != seq[i]){
1416 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1417 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1418 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1419 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1420 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1421 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1422 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1423 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1424 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1425 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1426 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1427 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1429 if(success == 0) { break; }
1438 catch(exception& e) {
1439 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1445 //***************************************************************************************************************
1447 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1450 int length = oligo.length();
1453 for(int i=0;i<length;i++){
1455 if(oligo[i] != seq[i]){
1456 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1457 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1458 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1459 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1460 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1461 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1462 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1463 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1464 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1465 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1466 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1467 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1474 catch(exception& e) {
1475 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1481 //***************************************************************************************************************