5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
20 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
21 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
22 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
23 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
27 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
29 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
30 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
31 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
32 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
33 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
34 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
35 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
36 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
37 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
38 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
39 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41 vector<string> myArray;
42 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
46 m->errorOut(e, "TrimSeqsCommand", "setParameters");
50 //**********************************************************************************************************************
51 string TrimSeqsCommand::getHelpString(){
53 string helpString = "";
54 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
55 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
56 helpString += "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
57 helpString += "The fasta parameter is required.\n";
58 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
59 helpString += "The oligos parameter allows you to provide an oligos file.\n";
60 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
61 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
62 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
63 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
64 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
65 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
66 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
67 helpString += "The qfile parameter allows you to provide a quality file.\n";
68 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
69 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
70 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
71 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
72 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
73 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
74 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
75 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
76 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
77 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
78 helpString += "The trim.seqs command should be in the following format: \n";
79 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
80 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
81 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
82 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
83 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
93 //**********************************************************************************************************************
95 TrimSeqsCommand::TrimSeqsCommand(){
97 abort = true; calledHelp = true;
99 vector<string> tempOutNames;
100 outputTypes["fasta"] = tempOutNames;
101 outputTypes["qfile"] = tempOutNames;
102 outputTypes["group"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109 //***************************************************************************************************************
111 TrimSeqsCommand::TrimSeqsCommand(string option) {
114 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 vector<string> myArray = setParameters();
124 OptionParser parser(option);
125 map<string,string> parameters = parser.getParameters();
127 ValidParameters validParameter;
128 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 //initialize outputTypes
136 vector<string> tempOutNames;
137 outputTypes["fasta"] = tempOutNames;
138 outputTypes["qfile"] = tempOutNames;
139 outputTypes["group"] = tempOutNames;
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("fasta");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["fasta"] = inputDir + it->second; }
154 it = parameters.find("oligos");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["oligos"] = inputDir + it->second; }
162 it = parameters.find("qfile");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["qfile"] = inputDir + it->second; }
173 //check for required parameters
174 fastaFile = validParameter.validFile(parameters, "fasta", true);
175 if (fastaFile == "not found") {
176 fastaFile = m->getFastaFile();
177 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else if (fastaFile == "not open") { abort = true; }
181 //if the user changes the output directory command factory will send this info to us in the output parameter
182 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
184 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
188 //check for optional parameter and set defaults
189 // ...at some point should added some additional type checking...
191 temp = validParameter.validFile(parameters, "flip", false);
192 if (temp == "not found"){ flip = 0; }
193 else if(m->isTrue(temp)) { flip = 1; }
195 temp = validParameter.validFile(parameters, "oligos", true);
196 if (temp == "not found"){ oligoFile = ""; }
197 else if(temp == "not open"){ abort = true; }
198 else { oligoFile = temp; }
201 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
202 convert(temp, maxAmbig);
204 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
205 convert(temp, maxHomoP);
207 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
208 convert(temp, minLength);
210 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
211 convert(temp, maxLength);
213 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
214 convert(temp, bdiffs);
216 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
217 convert(temp, pdiffs);
219 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
220 convert(temp, tdiffs);
222 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
224 temp = validParameter.validFile(parameters, "qfile", true);
225 if (temp == "not found") { qFileName = ""; }
226 else if(temp == "not open") { abort = true; }
227 else { qFileName = temp; }
229 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
230 convert(temp, qThreshold);
232 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
233 qtrim = m->isTrue(temp);
235 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
236 convert(temp, qRollAverage);
238 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
239 convert(temp, qWindowAverage);
241 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
242 convert(temp, qWindowSize);
244 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
245 convert(temp, qWindowStep);
247 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
248 convert(temp, qAverage);
250 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
251 convert(temp, keepFirst);
253 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
254 convert(temp, removeLast);
256 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
257 allFiles = m->isTrue(temp);
259 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
260 m->setProcessors(temp);
261 convert(temp, processors);
264 if(allFiles && (oligoFile == "")){
265 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
267 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
268 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
272 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
273 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
279 catch(exception& e) {
280 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
284 //***************************************************************************************************************
286 int TrimSeqsCommand::execute(){
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 numFPrimers = 0; //this needs to be initialized
293 vector<vector<string> > fastaFileNames;
294 vector<vector<string> > qualFileNames;
296 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
297 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
299 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
300 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
302 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
303 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
304 if (qFileName != "") {
305 outputNames.push_back(trimQualFile);
306 outputNames.push_back(scrapQualFile);
307 outputTypes["qfile"].push_back(trimQualFile);
308 outputTypes["qfile"].push_back(scrapQualFile);
311 string outputGroupFileName;
313 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
314 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
315 getOligos(fastaFileNames, qualFileNames);
318 vector<unsigned long int> fastaFilePos;
319 vector<unsigned long int> qFilePos;
321 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
323 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
324 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
325 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
327 if(qFileName == "") { qLines = lines; } //files with duds
329 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
331 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
333 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames);
336 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
339 if (m->control_pressed) { return 0; }
342 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
343 map<string, string>::iterator it;
344 set<string> namesToRemove;
345 for(int i=0;i<fastaFileNames.size();i++){
346 for(int j=0;j<fastaFileNames[0].size();j++){
347 if (fastaFileNames[i][j] != "") {
348 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
349 if(m->isBlank(fastaFileNames[i][j])){
350 remove(fastaFileNames[i][j].c_str());
351 namesToRemove.insert(fastaFileNames[i][j]);
354 remove(qualFileNames[i][j].c_str());
355 namesToRemove.insert(qualFileNames[i][j]);
358 it = uniqueFastaNames.find(fastaFileNames[i][j]);
359 if (it == uniqueFastaNames.end()) {
360 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
368 //remove names for outputFileNames, just cleans up the output
369 vector<string> outputNames2;
370 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
371 outputNames = outputNames2;
373 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
375 m->openInputFile(it->first, in);
378 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
379 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
380 m->openOutputFile(thisGroupName, out);
383 if (m->control_pressed) { break; }
385 Sequence currSeq(in); m->gobble(in);
386 out << currSeq.getName() << '\t' << it->second << endl;
393 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
395 //output group counts
396 m->mothurOutEndLine();
398 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
399 total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine();
401 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
403 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
405 //set fasta file as new current fastafile
407 itTypes = outputTypes.find("fasta");
408 if (itTypes != outputTypes.end()) {
409 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
412 itTypes = outputTypes.find("qfile");
413 if (itTypes != outputTypes.end()) {
414 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
417 itTypes = outputTypes.find("group");
418 if (itTypes != outputTypes.end()) {
419 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
422 m->mothurOutEndLine();
423 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
424 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
425 m->mothurOutEndLine();
430 catch(exception& e) {
431 m->errorOut(e, "TrimSeqsCommand", "execute");
436 /**************************************************************************************/
438 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {
442 ofstream trimFASTAFile;
443 m->openOutputFile(trimFileName, trimFASTAFile);
445 ofstream scrapFASTAFile;
446 m->openOutputFile(scrapFileName, scrapFASTAFile);
448 ofstream trimQualFile;
449 ofstream scrapQualFile;
451 m->openOutputFile(trimQFileName, trimQualFile);
452 m->openOutputFile(scrapQFileName, scrapQualFile);
455 ofstream outGroupsFile;
456 if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); }
458 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
459 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
460 if (fastaFileNames[i][j] != "") {
462 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
464 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
472 m->openInputFile(filename, inFASTA);
473 inFASTA.seekg(line->start);
476 if(qFileName != "") {
477 m->openInputFile(qFileName, qFile);
478 qFile.seekg(qline->start);
486 if (m->control_pressed) {
487 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
488 if (oligoFile != "") { outGroupsFile.close(); }
493 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
499 string trashCode = "";
500 int currentSeqsDiffs = 0;
502 Sequence currSeq(inFASTA); m->gobble(inFASTA);
504 QualityScores currQual;
506 currQual = QualityScores(qFile); m->gobble(qFile);
509 string origSeq = currSeq.getUnaligned();
512 int barcodeIndex = 0;
515 if(barcodes.size() != 0){
516 success = stripBarcode(currSeq, currQual, barcodeIndex);
517 if(success > bdiffs) { trashCode += 'b'; }
518 else{ currentSeqsDiffs += success; }
521 if(numFPrimers != 0){
522 success = stripForward(currSeq, currQual, primerIndex);
523 if(success > pdiffs) { trashCode += 'f'; }
524 else{ currentSeqsDiffs += success; }
527 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
529 if(numRPrimers != 0){
530 success = stripReverse(currSeq, currQual);
531 if(!success) { trashCode += 'r'; }
535 success = keepFirstTrim(currSeq, currQual);
539 success = removeLastTrim(currSeq, currQual);
540 if(!success) { trashCode += 'l'; }
545 int origLength = currSeq.getNumBases();
547 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
548 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
549 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
550 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
551 else { success = 1; }
553 //you don't want to trim, if it fails above then scrap it
554 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
556 if(!success) { trashCode += 'q'; }
559 if(minLength > 0 || maxLength > 0){
560 success = cullLength(currSeq);
561 if(!success) { trashCode += 'l'; }
564 success = cullHomoP(currSeq);
565 if(!success) { trashCode += 'h'; }
568 success = cullAmbigs(currSeq);
569 if(!success) { trashCode += 'n'; }
572 if(flip){ // should go last
573 currSeq.reverseComplement();
575 currQual.flipQScores();
579 if(trashCode.length() == 0){
580 currSeq.setAligned(currSeq.getUnaligned());
581 currSeq.printSequence(trimFASTAFile);
584 currQual.printQScores(trimQualFile);
587 if(barcodes.size() != 0){
588 string thisGroup = barcodeNameVector[barcodeIndex];
589 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
591 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
593 map<string, int>::iterator it = groupCounts.find(thisGroup);
594 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
595 else { groupCounts[it->first]++; }
602 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
603 currSeq.printSequence(output);
607 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
608 currQual.printQScores(output);
614 currSeq.setName(currSeq.getName() + '|' + trashCode);
615 currSeq.setUnaligned(origSeq);
616 currSeq.setAligned(origSeq);
617 currSeq.printSequence(scrapFASTAFile);
619 currQual.printQScores(scrapQualFile);
625 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
626 unsigned long int pos = inFASTA.tellg();
627 if ((pos == -1) || (pos >= line->end)) { break; }
629 if (inFASTA.eof()) { break; }
633 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
637 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
641 trimFASTAFile.close();
642 scrapFASTAFile.close();
643 if (oligoFile != "") { outGroupsFile.close(); }
644 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
648 catch(exception& e) {
649 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
654 /**************************************************************************************************/
656 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
658 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
663 //loop through and create all the processes you want
664 while (process != processors) {
668 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
672 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
673 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
678 for(int i=0;i<tempFASTAFileNames.size();i++){
679 for(int j=0;j<tempFASTAFileNames[i].size();j++){
680 if (tempFASTAFileNames[i][j] != "") {
681 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
682 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
685 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
686 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
693 driverCreateTrim(filename,
695 (trimFASTAFileName + toString(getpid()) + ".temp"),
696 (scrapFASTAFileName + toString(getpid()) + ".temp"),
697 (trimQualFileName + toString(getpid()) + ".temp"),
698 (scrapQualFileName + toString(getpid()) + ".temp"),
699 (groupFile + toString(getpid()) + ".temp"),
701 tempPrimerQualFileNames,
705 //pass groupCounts to parent
707 string tempFile = filename + toString(getpid()) + ".num.temp";
708 m->openOutputFile(tempFile, out);
709 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
710 out << it->first << '\t' << it->second << endl;
716 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
717 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
724 m->openOutputFile(trimFASTAFileName, temp); temp.close();
725 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
726 m->openOutputFile(trimQualFileName, temp); temp.close();
727 m->openOutputFile(scrapQualFileName, temp); temp.close();
729 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
731 //force parent to wait until all the processes are done
732 for (int i=0;i<processIDS.size();i++) {
733 int temp = processIDS[i];
738 for(int i=0;i<processIDS.size();i++){
740 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
742 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
743 remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
744 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
745 remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
748 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
749 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
750 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
751 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
754 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
755 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
759 for(int j=0;j<fastaFileNames.size();j++){
760 for(int k=0;k<fastaFileNames[j].size();k++){
761 if (fastaFileNames[j][k] != "") {
762 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
763 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
766 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
767 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
775 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
776 m->openInputFile(tempFile, in);
780 in >> group >> tempNum; m->gobble(in);
782 map<string, int>::iterator it = groupCounts.find(group);
783 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
784 else { groupCounts[it->first] += tempNum; }
786 in.close(); remove(tempFile.c_str());
793 catch(exception& e) {
794 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
799 /**************************************************************************************************/
801 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
803 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
804 //set file positions for fasta file
805 fastaFilePos = m->divideFile(filename, processors);
807 if (qfilename == "") { return processors; }
809 //get name of first sequence in each chunk
810 map<string, int> firstSeqNames;
811 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
813 m->openInputFile(filename, in);
814 in.seekg(fastaFilePos[i]);
817 firstSeqNames[temp.getName()] = i;
822 //seach for filePos of each first name in the qfile and save in qfileFilePos
824 m->openInputFile(qfilename, inQual);
827 while(!inQual.eof()){
828 input = m->getline(inQual);
830 if (input.length() != 0) {
831 if(input[0] == '>'){ //this is a sequence name line
832 istringstream nameStream(input);
834 string sname = ""; nameStream >> sname;
835 sname = sname.substr(1);
837 map<string, int>::iterator it = firstSeqNames.find(sname);
839 if(it != firstSeqNames.end()) { //this is the start of a new chunk
840 unsigned long int pos = inQual.tellg();
841 qfileFilePos.push_back(pos - input.length() - 1);
842 firstSeqNames.erase(it);
847 if (firstSeqNames.size() == 0) { break; }
852 if (firstSeqNames.size() != 0) {
853 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
854 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
860 //get last file position of qfile
862 unsigned long int size;
864 //get num bytes in file
865 pFile = fopen (qfilename.c_str(),"rb");
866 if (pFile==NULL) perror ("Error opening file");
868 fseek (pFile, 0, SEEK_END);
873 qfileFilePos.push_back(size);
879 fastaFilePos.push_back(0); qfileFilePos.push_back(0);
880 //get last file position of fastafile
882 unsigned long int size;
884 //get num bytes in file
885 pFile = fopen (filename.c_str(),"rb");
886 if (pFile==NULL) perror ("Error opening file");
888 fseek (pFile, 0, SEEK_END);
892 fastaFilePos.push_back(size);
894 //get last file position of fastafile
897 //get num bytes in file
898 qFile = fopen (qfilename.c_str(),"rb");
899 if (qFile==NULL) perror ("Error opening file");
901 fseek (qFile, 0, SEEK_END);
905 qfileFilePos.push_back(size);
911 catch(exception& e) {
912 m->errorOut(e, "TrimSeqsCommand", "setLines");
917 //***************************************************************************************************************
919 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
922 m->openInputFile(oligoFile, inOligos);
926 string type, oligo, group;
929 int indexBarcode = 0;
931 while(!inOligos.eof()){
936 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
941 //make type case insensitive
942 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
946 for(int i=0;i<oligo.length();i++){
947 oligo[i] = toupper(oligo[i]);
948 if(oligo[i] == 'U') { oligo[i] = 'T'; }
951 if(type == "FORWARD"){
954 // get rest of line in case there is a primer name
955 while (!inOligos.eof()) {
956 char c = inOligos.get();
957 if (c == 10 || c == 13){ break; }
958 else if (c == 32 || c == 9){;} //space or tab
962 //check for repeat barcodes
963 map<string, int>::iterator itPrime = primers.find(oligo);
964 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
966 primers[oligo]=indexPrimer; indexPrimer++;
967 primerNameVector.push_back(group);
969 else if(type == "REVERSE"){
970 Sequence oligoRC("reverse", oligo);
971 oligoRC.reverseComplement();
972 revPrimer.push_back(oligoRC.getUnaligned());
974 else if(type == "BARCODE"){
977 //check for repeat barcodes
978 map<string, int>::iterator itBar = barcodes.find(oligo);
979 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
981 barcodes[oligo]=indexBarcode; indexBarcode++;
982 barcodeNameVector.push_back(group);
984 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
990 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
992 //add in potential combos
993 if(barcodeNameVector.size() == 0){
995 barcodeNameVector.push_back("");
998 if(primerNameVector.size() == 0){
1000 primerNameVector.push_back("");
1003 fastaFileNames.resize(barcodeNameVector.size());
1004 for(int i=0;i<fastaFileNames.size();i++){
1005 fastaFileNames[i].assign(primerNameVector.size(), "");
1007 if(qFileName != ""){ qualFileNames = fastaFileNames; }
1010 set<string> uniqueNames; //used to cleanup outputFileNames
1011 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1012 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1014 string primerName = primerNameVector[itPrimer->second];
1015 string barcodeName = barcodeNameVector[itBar->second];
1017 string comboGroupName = "";
1018 string fastaFileName = "";
1019 string qualFileName = "";
1021 if(primerName == ""){
1022 comboGroupName = barcodeNameVector[itBar->second];
1025 if(barcodeName == ""){
1026 comboGroupName = primerNameVector[itPrimer->second];
1029 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1033 if ((comboGroupName == "") || (comboGroupName == ".")) {}
1036 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1037 if (uniqueNames.count(fastaFileName) == 0) {
1038 outputNames.push_back(fastaFileName);
1039 outputTypes["fasta"].push_back(fastaFileName);
1040 uniqueNames.insert(fastaFileName);
1043 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1044 m->openOutputFile(fastaFileName, temp); temp.close();
1046 if(qFileName != ""){
1047 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1048 if (uniqueNames.count(fastaFileName) == 0) {
1049 outputNames.push_back(qualFileName);
1050 outputTypes["qfile"].push_back(qualFileName);
1053 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1054 m->openOutputFile(qualFileName, temp); temp.close();
1060 numFPrimers = primers.size();
1061 numRPrimers = revPrimer.size();
1064 catch(exception& e) {
1065 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1070 //***************************************************************************************************************
1072 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1075 string rawSequence = seq.getUnaligned();
1076 int success = bdiffs + 1; //guilty until proven innocent
1078 //can you find the barcode
1079 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1080 string oligo = it->first;
1081 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1082 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1086 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1088 seq.setUnaligned(rawSequence.substr(oligo.length()));
1090 if(qual.getName() != ""){
1091 qual.trimQScores(oligo.length(), -1);
1099 //if you found the barcode or if you don't want to allow for diffs
1100 if ((bdiffs == 0) || (success == 0)) { return success; }
1102 else { //try aligning and see if you can find it
1106 Alignment* alignment;
1107 if (barcodes.size() > 0) {
1108 map<string,int>::iterator it=barcodes.begin();
1110 for(it;it!=barcodes.end();it++){
1111 if(it->first.length() > maxLength){
1112 maxLength = it->first.length();
1115 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1117 }else{ alignment = NULL; }
1119 //can you find the barcode
1125 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1126 string oligo = it->first;
1127 // int length = oligo.length();
1129 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1130 success = bdiffs + 10;
1134 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1135 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1136 oligo = alignment->getSeqAAln();
1137 string temp = alignment->getSeqBAln();
1139 int alnLength = oligo.length();
1141 for(int i=oligo.length()-1;i>=0;i--){
1142 if(oligo[i] != '-'){ alnLength = i+1; break; }
1144 oligo = oligo.substr(0,alnLength);
1145 temp = temp.substr(0,alnLength);
1147 int numDiff = countDiffs(oligo, temp);
1149 if(numDiff < minDiff){
1152 minGroup = it->second;
1154 for(int i=0;i<alnLength;i++){
1160 else if(numDiff == minDiff){
1166 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1167 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1168 else{ //use the best match
1170 seq.setUnaligned(rawSequence.substr(minPos));
1172 if(qual.getName() != ""){
1173 qual.trimQScores(minPos, -1);
1178 if (alignment != NULL) { delete alignment; }
1185 catch(exception& e) {
1186 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1192 //***************************************************************************************************************
1194 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1196 string rawSequence = seq.getUnaligned();
1197 int success = pdiffs + 1; //guilty until proven innocent
1199 //can you find the primer
1200 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1201 string oligo = it->first;
1202 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1203 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1207 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1209 seq.setUnaligned(rawSequence.substr(oligo.length()));
1210 if(qual.getName() != ""){
1211 qual.trimQScores(oligo.length(), -1);
1218 //if you found the barcode or if you don't want to allow for diffs
1219 if ((pdiffs == 0) || (success == 0)) { return success; }
1221 else { //try aligning and see if you can find it
1225 Alignment* alignment;
1226 if (primers.size() > 0) {
1227 map<string,int>::iterator it=primers.begin();
1229 for(it;it!=primers.end();it++){
1230 if(it->first.length() > maxLength){
1231 maxLength = it->first.length();
1234 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1236 }else{ alignment = NULL; }
1238 //can you find the barcode
1244 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1245 string oligo = it->first;
1246 // int length = oligo.length();
1248 if(rawSequence.length() < maxLength){
1249 success = pdiffs + 100;
1253 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1254 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1255 oligo = alignment->getSeqAAln();
1256 string temp = alignment->getSeqBAln();
1258 int alnLength = oligo.length();
1260 for(int i=oligo.length()-1;i>=0;i--){
1261 if(oligo[i] != '-'){ alnLength = i+1; break; }
1263 oligo = oligo.substr(0,alnLength);
1264 temp = temp.substr(0,alnLength);
1266 int numDiff = countDiffs(oligo, temp);
1268 if(numDiff < minDiff){
1271 minGroup = it->second;
1273 for(int i=0;i<alnLength;i++){
1279 else if(numDiff == minDiff){
1285 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1286 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1287 else{ //use the best match
1289 seq.setUnaligned(rawSequence.substr(minPos));
1290 if(qual.getName() != ""){
1291 qual.trimQScores(minPos, -1);
1296 if (alignment != NULL) { delete alignment; }
1303 catch(exception& e) {
1304 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1309 //***************************************************************************************************************
1311 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1313 string rawSequence = seq.getUnaligned();
1314 bool success = 0; //guilty until proven innocent
1316 for(int i=0;i<numRPrimers;i++){
1317 string oligo = revPrimer[i];
1319 if(rawSequence.length() < oligo.length()){
1324 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1325 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1326 if(qual.getName() != ""){
1327 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1336 catch(exception& e) {
1337 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1342 //***************************************************************************************************************
1344 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1347 if(qscores.getName() != ""){
1348 qscores.trimQScores(-1, keepFirst);
1350 sequence.trim(keepFirst);
1353 catch(exception& e) {
1354 m->errorOut(e, "keepFirstTrim", "countDiffs");
1360 //***************************************************************************************************************
1362 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1366 int length = sequence.getNumBases() - removeLast;
1369 if(qscores.getName() != ""){
1370 qscores.trimQScores(-1, length);
1372 sequence.trim(length);
1381 catch(exception& e) {
1382 m->errorOut(e, "removeLastTrim", "countDiffs");
1388 //***************************************************************************************************************
1390 bool TrimSeqsCommand::cullLength(Sequence& seq){
1393 int length = seq.getNumBases();
1394 bool success = 0; //guilty until proven innocent
1396 if(length >= minLength && maxLength == 0) { success = 1; }
1397 else if(length >= minLength && length <= maxLength) { success = 1; }
1398 else { success = 0; }
1403 catch(exception& e) {
1404 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1410 //***************************************************************************************************************
1412 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1414 int longHomoP = seq.getLongHomoPolymer();
1415 bool success = 0; //guilty until proven innocent
1417 if(longHomoP <= maxHomoP){ success = 1; }
1418 else { success = 0; }
1422 catch(exception& e) {
1423 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1429 //***************************************************************************************************************
1431 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1433 int numNs = seq.getAmbigBases();
1434 bool success = 0; //guilty until proven innocent
1436 if(numNs <= maxAmbig) { success = 1; }
1437 else { success = 0; }
1441 catch(exception& e) {
1442 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1448 //***************************************************************************************************************
1450 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1453 int length = oligo.length();
1455 for(int i=0;i<length;i++){
1457 if(oligo[i] != seq[i]){
1458 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1459 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1460 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1461 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1462 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1463 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1464 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1465 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1466 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1467 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1468 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1469 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1471 if(success == 0) { break; }
1480 catch(exception& e) {
1481 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1487 //***************************************************************************************************************
1489 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1492 int length = oligo.length();
1495 for(int i=0;i<length;i++){
1497 if(oligo[i] != seq[i]){
1498 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1499 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1500 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1501 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1502 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1503 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1504 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1505 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1506 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1507 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1508 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1509 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1516 catch(exception& e) {
1517 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1523 //***************************************************************************************************************