5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
20 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
21 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
22 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
23 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
27 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
29 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
30 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
31 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
32 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
33 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
34 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
35 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
36 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
37 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
38 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
39 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41 vector<string> myArray;
42 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
46 m->errorOut(e, "TrimSeqsCommand", "setParameters");
50 //**********************************************************************************************************************
51 string TrimSeqsCommand::getHelpString(){
53 string helpString = "";
54 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
55 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
56 helpString += "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
57 helpString += "The fasta parameter is required.\n";
58 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
59 helpString += "The oligos parameter allows you to provide an oligos file.\n";
60 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
61 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
62 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
63 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
64 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
65 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
66 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
67 helpString += "The qfile parameter allows you to provide a quality file.\n";
68 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
69 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
70 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
71 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
72 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
73 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
74 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
75 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
76 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
77 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
78 helpString += "The trim.seqs command should be in the following format: \n";
79 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
80 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
81 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
82 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
83 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
93 //**********************************************************************************************************************
95 TrimSeqsCommand::TrimSeqsCommand(){
97 abort = true; calledHelp = true;
99 vector<string> tempOutNames;
100 outputTypes["fasta"] = tempOutNames;
101 outputTypes["qfile"] = tempOutNames;
102 outputTypes["group"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109 //***************************************************************************************************************
111 TrimSeqsCommand::TrimSeqsCommand(string option) {
114 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 vector<string> myArray = setParameters();
124 OptionParser parser(option);
125 map<string,string> parameters = parser.getParameters();
127 ValidParameters validParameter;
128 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 //initialize outputTypes
136 vector<string> tempOutNames;
137 outputTypes["fasta"] = tempOutNames;
138 outputTypes["qfile"] = tempOutNames;
139 outputTypes["group"] = tempOutNames;
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("fasta");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["fasta"] = inputDir + it->second; }
154 it = parameters.find("oligos");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["oligos"] = inputDir + it->second; }
162 it = parameters.find("qfile");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["qfile"] = inputDir + it->second; }
173 //check for required parameters
174 fastaFile = validParameter.validFile(parameters, "fasta", true);
175 if (fastaFile == "not found") {
176 fastaFile = m->getFastaFile();
177 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else if (fastaFile == "not open") { abort = true; }
181 //if the user changes the output directory command factory will send this info to us in the output parameter
182 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
184 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
188 //check for optional parameter and set defaults
189 // ...at some point should added some additional type checking...
191 temp = validParameter.validFile(parameters, "flip", false);
192 if (temp == "not found"){ flip = 0; }
193 else if(m->isTrue(temp)) { flip = 1; }
195 temp = validParameter.validFile(parameters, "oligos", true);
196 if (temp == "not found"){ oligoFile = ""; }
197 else if(temp == "not open"){ abort = true; }
198 else { oligoFile = temp; }
201 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
202 convert(temp, maxAmbig);
204 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
205 convert(temp, maxHomoP);
207 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
208 convert(temp, minLength);
210 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
211 convert(temp, maxLength);
213 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
214 convert(temp, bdiffs);
216 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
217 convert(temp, pdiffs);
219 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
220 convert(temp, tdiffs);
222 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
224 temp = validParameter.validFile(parameters, "qfile", true);
225 if (temp == "not found") { qFileName = ""; }
226 else if(temp == "not open") { abort = true; }
227 else { qFileName = temp; }
229 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
230 convert(temp, qThreshold);
232 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
233 qtrim = m->isTrue(temp);
235 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
236 convert(temp, qRollAverage);
238 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
239 convert(temp, qWindowAverage);
241 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
242 convert(temp, qWindowSize);
244 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
245 convert(temp, qWindowStep);
247 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
248 convert(temp, qAverage);
250 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
251 convert(temp, keepFirst);
253 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
254 convert(temp, removeLast);
256 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
257 allFiles = m->isTrue(temp);
259 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
260 m->setProcessors(temp);
261 convert(temp, processors);
264 if(allFiles && (oligoFile == "")){
265 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
267 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
268 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
272 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
273 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
279 catch(exception& e) {
280 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
284 //***************************************************************************************************************
286 int TrimSeqsCommand::execute(){
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 numFPrimers = 0; //this needs to be initialized
293 vector<vector<string> > fastaFileNames;
294 vector<vector<string> > qualFileNames;
296 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
297 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
299 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
300 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
302 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
303 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
304 if (qFileName != "") {
305 outputNames.push_back(trimQualFile);
306 outputNames.push_back(scrapQualFile);
307 outputTypes["qfile"].push_back(trimQualFile);
308 outputTypes["qfile"].push_back(scrapQualFile);
311 string outputGroupFileName;
313 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
314 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
315 getOligos(fastaFileNames, qualFileNames);
318 vector<unsigned long int> fastaFilePos;
319 vector<unsigned long int> qFilePos;
321 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
323 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
324 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
325 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
327 if(qFileName == "") { qLines = lines; } //files with duds
329 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
331 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
333 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames);
336 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
339 if (m->control_pressed) { return 0; }
342 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
343 map<string, string>::iterator it;
344 set<string> namesToRemove;
345 for(int i=0;i<fastaFileNames.size();i++){
346 for(int j=0;j<fastaFileNames[0].size();j++){
347 if (fastaFileNames[i][j] != "") {
348 if(m->isBlank(fastaFileNames[i][j])){
349 remove(fastaFileNames[i][j].c_str());
350 namesToRemove.insert(fastaFileNames[i][j]);
353 remove(qualFileNames[i][j].c_str());
354 namesToRemove.insert(qualFileNames[i][j]);
357 it = uniqueFastaNames.find(fastaFileNames[i][j]);
358 if (it == uniqueFastaNames.end()) {
359 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
366 //remove names for outputFileNames, just cleans up the output
367 vector<string> outputNames2;
368 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
369 outputNames = outputNames2;
371 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
373 m->openInputFile(it->first, in);
376 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
377 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
378 m->openOutputFile(thisGroupName, out);
381 if (m->control_pressed) { break; }
383 Sequence currSeq(in); m->gobble(in);
384 out << currSeq.getName() << '\t' << it->second << endl;
391 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
393 //output group counts
394 m->mothurOutEndLine();
396 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
397 total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine();
399 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
401 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
403 //set fasta file as new current fastafile
405 itTypes = outputTypes.find("fasta");
406 if (itTypes != outputTypes.end()) {
407 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
410 itTypes = outputTypes.find("qfile");
411 if (itTypes != outputTypes.end()) {
412 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
415 itTypes = outputTypes.find("group");
416 if (itTypes != outputTypes.end()) {
417 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
420 m->mothurOutEndLine();
421 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
422 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
423 m->mothurOutEndLine();
428 catch(exception& e) {
429 m->errorOut(e, "TrimSeqsCommand", "execute");
434 /**************************************************************************************/
436 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {
440 ofstream trimFASTAFile;
441 m->openOutputFile(trimFileName, trimFASTAFile);
443 ofstream scrapFASTAFile;
444 m->openOutputFile(scrapFileName, scrapFASTAFile);
446 ofstream trimQualFile;
447 ofstream scrapQualFile;
449 m->openOutputFile(trimQFileName, trimQualFile);
450 m->openOutputFile(scrapQFileName, scrapQualFile);
453 ofstream outGroupsFile;
454 if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); }
456 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
457 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
458 if (fastaFileNames[i][j] != "") {
460 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
462 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
470 m->openInputFile(filename, inFASTA);
471 inFASTA.seekg(line->start);
474 if(qFileName != "") {
475 m->openInputFile(qFileName, qFile);
476 qFile.seekg(qline->start);
484 if (m->control_pressed) {
485 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
486 if (oligoFile != "") { outGroupsFile.close(); }
491 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
497 string trashCode = "";
498 int currentSeqsDiffs = 0;
500 Sequence currSeq(inFASTA); m->gobble(inFASTA);
502 QualityScores currQual;
504 currQual = QualityScores(qFile); m->gobble(qFile);
507 string origSeq = currSeq.getUnaligned();
510 int barcodeIndex = 0;
513 if(barcodes.size() != 0){
514 success = stripBarcode(currSeq, currQual, barcodeIndex);
515 if(success > bdiffs) { trashCode += 'b'; }
516 else{ currentSeqsDiffs += success; }
519 if(numFPrimers != 0){
520 success = stripForward(currSeq, currQual, primerIndex);
521 if(success > pdiffs) { trashCode += 'f'; }
522 else{ currentSeqsDiffs += success; }
525 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
527 if(numRPrimers != 0){
528 success = stripReverse(currSeq, currQual);
529 if(!success) { trashCode += 'r'; }
533 success = keepFirstTrim(currSeq, currQual);
537 success = removeLastTrim(currSeq, currQual);
538 if(!success) { trashCode += 'l'; }
543 int origLength = currSeq.getNumBases();
545 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
546 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
547 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
548 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
549 else { success = 1; }
551 //you don't want to trim, if it fails above then scrap it
552 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
554 if(!success) { trashCode += 'q'; }
557 if(minLength > 0 || maxLength > 0){
558 success = cullLength(currSeq);
559 if(!success) { trashCode += 'l'; }
562 success = cullHomoP(currSeq);
563 if(!success) { trashCode += 'h'; }
566 success = cullAmbigs(currSeq);
567 if(!success) { trashCode += 'n'; }
570 if(flip){ // should go last
571 currSeq.reverseComplement();
573 currQual.flipQScores();
577 if(trashCode.length() == 0){
578 currSeq.setAligned(currSeq.getUnaligned());
579 currSeq.printSequence(trimFASTAFile);
582 currQual.printQScores(trimQualFile);
585 if(barcodes.size() != 0){
586 string thisGroup = barcodeNameVector[barcodeIndex];
587 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
589 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
591 map<string, int>::iterator it = groupCounts.find(thisGroup);
592 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
593 else { groupCounts[it->first]++; }
600 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
601 currSeq.printSequence(output);
605 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
606 currQual.printQScores(output);
612 currSeq.setName(currSeq.getName() + '|' + trashCode);
613 currSeq.setUnaligned(origSeq);
614 currSeq.setAligned(origSeq);
615 currSeq.printSequence(scrapFASTAFile);
617 currQual.printQScores(scrapQualFile);
623 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
624 unsigned long int pos = inFASTA.tellg();
625 if ((pos == -1) || (pos >= line->end)) { break; }
627 if (inFASTA.eof()) { break; }
631 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
635 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
639 trimFASTAFile.close();
640 scrapFASTAFile.close();
641 if (oligoFile != "") { outGroupsFile.close(); }
642 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
646 catch(exception& e) {
647 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
652 /**************************************************************************************************/
654 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
656 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
661 //loop through and create all the processes you want
662 while (process != processors) {
666 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
670 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
671 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
676 for(int i=0;i<tempFASTAFileNames.size();i++){
677 for(int j=0;j<tempFASTAFileNames[i].size();j++){
678 if (tempFASTAFileNames[i][j] != "") {
679 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
680 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
683 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
684 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
691 driverCreateTrim(filename,
693 (trimFASTAFileName + toString(getpid()) + ".temp"),
694 (scrapFASTAFileName + toString(getpid()) + ".temp"),
695 (trimQualFileName + toString(getpid()) + ".temp"),
696 (scrapQualFileName + toString(getpid()) + ".temp"),
697 (groupFile + toString(getpid()) + ".temp"),
699 tempPrimerQualFileNames,
703 //pass groupCounts to parent
705 string tempFile = filename + toString(getpid()) + ".num.temp";
706 m->openOutputFile(tempFile, out);
707 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
708 out << it->first << '\t' << it->second << endl;
714 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
715 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
722 m->openOutputFile(trimFASTAFileName, temp); temp.close();
723 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
724 m->openOutputFile(trimQualFileName, temp); temp.close();
725 m->openOutputFile(scrapQualFileName, temp); temp.close();
727 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
729 //force parent to wait until all the processes are done
730 for (int i=0;i<processIDS.size();i++) {
731 int temp = processIDS[i];
736 for(int i=0;i<processIDS.size();i++){
738 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
740 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
741 remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
742 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
743 remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
746 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
747 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
748 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
749 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
752 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
753 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
757 for(int j=0;j<fastaFileNames.size();j++){
758 for(int k=0;k<fastaFileNames[j].size();k++){
759 if (fastaFileNames[j][k] != "") {
760 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
761 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
764 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
765 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
773 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
774 m->openInputFile(tempFile, in);
778 in >> group >> tempNum; m->gobble(in);
780 map<string, int>::iterator it = groupCounts.find(group);
781 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
782 else { groupCounts[it->first] += tempNum; }
784 in.close(); remove(tempFile.c_str());
791 catch(exception& e) {
792 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
797 /**************************************************************************************************/
799 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
801 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
802 //set file positions for fasta file
803 fastaFilePos = m->divideFile(filename, processors);
805 if (qfilename == "") { return processors; }
807 //get name of first sequence in each chunk
808 map<string, int> firstSeqNames;
809 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
811 m->openInputFile(filename, in);
812 in.seekg(fastaFilePos[i]);
815 firstSeqNames[temp.getName()] = i;
820 //seach for filePos of each first name in the qfile and save in qfileFilePos
822 m->openInputFile(qfilename, inQual);
825 while(!inQual.eof()){
826 input = m->getline(inQual);
828 if (input.length() != 0) {
829 if(input[0] == '>'){ //this is a sequence name line
830 istringstream nameStream(input);
832 string sname = ""; nameStream >> sname;
833 sname = sname.substr(1);
835 map<string, int>::iterator it = firstSeqNames.find(sname);
837 if(it != firstSeqNames.end()) { //this is the start of a new chunk
838 unsigned long int pos = inQual.tellg();
839 qfileFilePos.push_back(pos - input.length() - 1);
840 firstSeqNames.erase(it);
845 if (firstSeqNames.size() == 0) { break; }
850 if (firstSeqNames.size() != 0) {
851 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
852 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
858 //get last file position of qfile
860 unsigned long int size;
862 //get num bytes in file
863 pFile = fopen (qfilename.c_str(),"rb");
864 if (pFile==NULL) perror ("Error opening file");
866 fseek (pFile, 0, SEEK_END);
871 qfileFilePos.push_back(size);
877 fastaFilePos.push_back(0); qfileFilePos.push_back(0);
878 //get last file position of fastafile
880 unsigned long int size;
882 //get num bytes in file
883 pFile = fopen (filename.c_str(),"rb");
884 if (pFile==NULL) perror ("Error opening file");
886 fseek (pFile, 0, SEEK_END);
890 fastaFilePos.push_back(size);
892 //get last file position of fastafile
895 //get num bytes in file
896 qFile = fopen (qfilename.c_str(),"rb");
897 if (qFile==NULL) perror ("Error opening file");
899 fseek (qFile, 0, SEEK_END);
903 qfileFilePos.push_back(size);
909 catch(exception& e) {
910 m->errorOut(e, "TrimSeqsCommand", "setLines");
915 //***************************************************************************************************************
917 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
920 m->openInputFile(oligoFile, inOligos);
924 string type, oligo, group;
927 int indexBarcode = 0;
929 while(!inOligos.eof()){
934 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
939 //make type case insensitive
940 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
944 for(int i=0;i<oligo.length();i++){
945 oligo[i] = toupper(oligo[i]);
946 if(oligo[i] == 'U') { oligo[i] = 'T'; }
949 if(type == "FORWARD"){
952 // get rest of line in case there is a primer name
953 while (!inOligos.eof()) {
954 char c = inOligos.get();
955 if (c == 10 || c == 13){ break; }
956 else if (c == 32 || c == 9){;} //space or tab
960 //check for repeat barcodes
961 map<string, int>::iterator itPrime = primers.find(oligo);
962 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
964 primers[oligo]=indexPrimer; indexPrimer++;
965 primerNameVector.push_back(group);
967 else if(type == "REVERSE"){
968 Sequence oligoRC("reverse", oligo);
969 oligoRC.reverseComplement();
970 revPrimer.push_back(oligoRC.getUnaligned());
972 else if(type == "BARCODE"){
975 //check for repeat barcodes
976 map<string, int>::iterator itBar = barcodes.find(oligo);
977 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
979 barcodes[oligo]=indexBarcode; indexBarcode++;
980 barcodeNameVector.push_back(group);
982 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
988 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
990 //add in potential combos
991 if(barcodeNameVector.size() == 0){
993 barcodeNameVector.push_back("");
996 if(primerNameVector.size() == 0){
998 primerNameVector.push_back("");
1001 fastaFileNames.resize(barcodeNameVector.size());
1002 for(int i=0;i<fastaFileNames.size();i++){
1003 fastaFileNames[i].assign(primerNameVector.size(), "");
1005 if(qFileName != ""){ qualFileNames = fastaFileNames; }
1008 set<string> uniqueNames; //used to cleanup outputFileNames
1009 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1010 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1012 string primerName = primerNameVector[itPrimer->second];
1013 string barcodeName = barcodeNameVector[itBar->second];
1015 string comboGroupName = "";
1016 string fastaFileName = "";
1017 string qualFileName = "";
1019 if(primerName == ""){
1020 comboGroupName = barcodeNameVector[itBar->second];
1023 if(barcodeName == ""){
1024 comboGroupName = primerNameVector[itPrimer->second];
1027 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1032 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1033 if (uniqueNames.count(fastaFileName) == 0) {
1034 outputNames.push_back(fastaFileName);
1035 outputTypes["fasta"].push_back(fastaFileName);
1036 uniqueNames.insert(fastaFileName);
1039 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1040 m->openOutputFile(fastaFileName, temp); temp.close();
1042 if(qFileName != ""){
1043 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1044 if (uniqueNames.count(fastaFileName) == 0) {
1045 outputNames.push_back(qualFileName);
1046 outputTypes["qfile"].push_back(qualFileName);
1049 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1050 m->openOutputFile(qualFileName, temp); temp.close();
1055 numFPrimers = primers.size();
1056 numRPrimers = revPrimer.size();
1059 catch(exception& e) {
1060 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1065 //***************************************************************************************************************
1067 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1070 string rawSequence = seq.getUnaligned();
1071 int success = bdiffs + 1; //guilty until proven innocent
1073 //can you find the barcode
1074 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1075 string oligo = it->first;
1076 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1077 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1081 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1083 seq.setUnaligned(rawSequence.substr(oligo.length()));
1085 if(qual.getName() != ""){
1086 qual.trimQScores(oligo.length(), -1);
1094 //if you found the barcode or if you don't want to allow for diffs
1095 if ((bdiffs == 0) || (success == 0)) { return success; }
1097 else { //try aligning and see if you can find it
1101 Alignment* alignment;
1102 if (barcodes.size() > 0) {
1103 map<string,int>::iterator it=barcodes.begin();
1105 for(it;it!=barcodes.end();it++){
1106 if(it->first.length() > maxLength){
1107 maxLength = it->first.length();
1110 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1112 }else{ alignment = NULL; }
1114 //can you find the barcode
1120 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1121 string oligo = it->first;
1122 // int length = oligo.length();
1124 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1125 success = bdiffs + 10;
1129 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1130 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1131 oligo = alignment->getSeqAAln();
1132 string temp = alignment->getSeqBAln();
1134 int alnLength = oligo.length();
1136 for(int i=oligo.length()-1;i>=0;i--){
1137 if(oligo[i] != '-'){ alnLength = i+1; break; }
1139 oligo = oligo.substr(0,alnLength);
1140 temp = temp.substr(0,alnLength);
1142 int numDiff = countDiffs(oligo, temp);
1144 if(numDiff < minDiff){
1147 minGroup = it->second;
1149 for(int i=0;i<alnLength;i++){
1155 else if(numDiff == minDiff){
1161 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1162 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1163 else{ //use the best match
1165 seq.setUnaligned(rawSequence.substr(minPos));
1167 if(qual.getName() != ""){
1168 qual.trimQScores(minPos, -1);
1173 if (alignment != NULL) { delete alignment; }
1180 catch(exception& e) {
1181 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1187 //***************************************************************************************************************
1189 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1191 string rawSequence = seq.getUnaligned();
1192 int success = pdiffs + 1; //guilty until proven innocent
1194 //can you find the primer
1195 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1196 string oligo = it->first;
1197 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1198 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1202 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1204 seq.setUnaligned(rawSequence.substr(oligo.length()));
1205 if(qual.getName() != ""){
1206 qual.trimQScores(oligo.length(), -1);
1213 //if you found the barcode or if you don't want to allow for diffs
1214 if ((pdiffs == 0) || (success == 0)) { return success; }
1216 else { //try aligning and see if you can find it
1220 Alignment* alignment;
1221 if (primers.size() > 0) {
1222 map<string,int>::iterator it=primers.begin();
1224 for(it;it!=primers.end();it++){
1225 if(it->first.length() > maxLength){
1226 maxLength = it->first.length();
1229 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1231 }else{ alignment = NULL; }
1233 //can you find the barcode
1239 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1240 string oligo = it->first;
1241 // int length = oligo.length();
1243 if(rawSequence.length() < maxLength){
1244 success = pdiffs + 100;
1248 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1249 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1250 oligo = alignment->getSeqAAln();
1251 string temp = alignment->getSeqBAln();
1253 int alnLength = oligo.length();
1255 for(int i=oligo.length()-1;i>=0;i--){
1256 if(oligo[i] != '-'){ alnLength = i+1; break; }
1258 oligo = oligo.substr(0,alnLength);
1259 temp = temp.substr(0,alnLength);
1261 int numDiff = countDiffs(oligo, temp);
1263 if(numDiff < minDiff){
1266 minGroup = it->second;
1268 for(int i=0;i<alnLength;i++){
1274 else if(numDiff == minDiff){
1280 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1281 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1282 else{ //use the best match
1284 seq.setUnaligned(rawSequence.substr(minPos));
1285 if(qual.getName() != ""){
1286 qual.trimQScores(minPos, -1);
1291 if (alignment != NULL) { delete alignment; }
1298 catch(exception& e) {
1299 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1304 //***************************************************************************************************************
1306 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1308 string rawSequence = seq.getUnaligned();
1309 bool success = 0; //guilty until proven innocent
1311 for(int i=0;i<numRPrimers;i++){
1312 string oligo = revPrimer[i];
1314 if(rawSequence.length() < oligo.length()){
1319 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1320 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1321 if(qual.getName() != ""){
1322 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1331 catch(exception& e) {
1332 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1337 //***************************************************************************************************************
1339 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1342 if(qscores.getName() != ""){
1343 qscores.trimQScores(-1, keepFirst);
1345 sequence.trim(keepFirst);
1348 catch(exception& e) {
1349 m->errorOut(e, "keepFirstTrim", "countDiffs");
1355 //***************************************************************************************************************
1357 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1361 int length = sequence.getNumBases() - removeLast;
1364 if(qscores.getName() != ""){
1365 qscores.trimQScores(-1, length);
1367 sequence.trim(length);
1376 catch(exception& e) {
1377 m->errorOut(e, "removeLastTrim", "countDiffs");
1383 //***************************************************************************************************************
1385 bool TrimSeqsCommand::cullLength(Sequence& seq){
1388 int length = seq.getNumBases();
1389 bool success = 0; //guilty until proven innocent
1391 if(length >= minLength && maxLength == 0) { success = 1; }
1392 else if(length >= minLength && length <= maxLength) { success = 1; }
1393 else { success = 0; }
1398 catch(exception& e) {
1399 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1405 //***************************************************************************************************************
1407 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1409 int longHomoP = seq.getLongHomoPolymer();
1410 bool success = 0; //guilty until proven innocent
1412 if(longHomoP <= maxHomoP){ success = 1; }
1413 else { success = 0; }
1417 catch(exception& e) {
1418 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1424 //***************************************************************************************************************
1426 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1428 int numNs = seq.getAmbigBases();
1429 bool success = 0; //guilty until proven innocent
1431 if(numNs <= maxAmbig) { success = 1; }
1432 else { success = 0; }
1436 catch(exception& e) {
1437 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1443 //***************************************************************************************************************
1445 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1448 int length = oligo.length();
1450 for(int i=0;i<length;i++){
1452 if(oligo[i] != seq[i]){
1453 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1454 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1455 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1456 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1457 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1458 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1459 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1460 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1461 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1462 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1463 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1464 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1466 if(success == 0) { break; }
1475 catch(exception& e) {
1476 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1482 //***************************************************************************************************************
1484 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1487 int length = oligo.length();
1490 for(int i=0;i<length;i++){
1492 if(oligo[i] != seq[i]){
1493 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1494 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1495 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1496 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1497 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1498 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1499 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1500 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1501 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1502 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1503 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1504 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1511 catch(exception& e) {
1512 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1518 //***************************************************************************************************************