5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
20 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
21 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
22 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
23 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
24 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
25 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
26 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
27 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
29 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
30 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
31 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
32 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
33 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
34 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
35 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
36 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
37 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
38 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
39 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41 vector<string> myArray;
42 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
46 m->errorOut(e, "TrimSeqsCommand", "setParameters");
50 //**********************************************************************************************************************
51 string TrimSeqsCommand::getHelpString(){
53 string helpString = "";
54 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
55 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
56 helpString += "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
57 helpString += "The fasta parameter is required.\n";
58 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
59 helpString += "The oligos parameter allows you to provide an oligos file.\n";
60 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
61 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
62 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
63 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
64 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
65 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
66 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
67 helpString += "The qfile parameter allows you to provide a quality file.\n";
68 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
69 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
70 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
71 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
72 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
73 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
74 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
75 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
76 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
77 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
78 helpString += "The trim.seqs command should be in the following format: \n";
79 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
80 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
81 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
82 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
83 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
93 //**********************************************************************************************************************
95 TrimSeqsCommand::TrimSeqsCommand(){
97 abort = true; calledHelp = true;
99 vector<string> tempOutNames;
100 outputTypes["fasta"] = tempOutNames;
101 outputTypes["qfile"] = tempOutNames;
102 outputTypes["group"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109 //***************************************************************************************************************
111 TrimSeqsCommand::TrimSeqsCommand(string option) {
114 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 vector<string> myArray = setParameters();
124 OptionParser parser(option);
125 map<string,string> parameters = parser.getParameters();
127 ValidParameters validParameter;
128 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 //initialize outputTypes
136 vector<string> tempOutNames;
137 outputTypes["fasta"] = tempOutNames;
138 outputTypes["qfile"] = tempOutNames;
139 outputTypes["group"] = tempOutNames;
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("fasta");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["fasta"] = inputDir + it->second; }
154 it = parameters.find("oligos");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["oligos"] = inputDir + it->second; }
162 it = parameters.find("qfile");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["qfile"] = inputDir + it->second; }
173 //check for required parameters
174 fastaFile = validParameter.validFile(parameters, "fasta", true);
175 if (fastaFile == "not found") {
176 fastaFile = m->getFastaFile();
177 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else if (fastaFile == "not open") { abort = true; }
181 //if the user changes the output directory command factory will send this info to us in the output parameter
182 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
184 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
188 //check for optional parameter and set defaults
189 // ...at some point should added some additional type checking...
191 temp = validParameter.validFile(parameters, "flip", false);
192 if (temp == "not found"){ flip = 0; }
193 else if(m->isTrue(temp)) { flip = 1; }
195 temp = validParameter.validFile(parameters, "oligos", true);
196 if (temp == "not found"){ oligoFile = ""; }
197 else if(temp == "not open"){ abort = true; }
198 else { oligoFile = temp; }
201 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
202 convert(temp, maxAmbig);
204 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
205 convert(temp, maxHomoP);
207 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
208 convert(temp, minLength);
210 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
211 convert(temp, maxLength);
213 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
214 convert(temp, bdiffs);
216 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
217 convert(temp, pdiffs);
219 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
220 convert(temp, tdiffs);
222 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
224 temp = validParameter.validFile(parameters, "qfile", true);
225 if (temp == "not found") { qFileName = ""; }
226 else if(temp == "not open") { abort = true; }
227 else { qFileName = temp; }
229 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
230 convert(temp, qThreshold);
232 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
233 qtrim = m->isTrue(temp);
235 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
236 convert(temp, qRollAverage);
238 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
239 convert(temp, qWindowAverage);
241 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
242 convert(temp, qWindowSize);
244 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
245 convert(temp, qWindowStep);
247 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
248 convert(temp, qAverage);
250 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
251 convert(temp, keepFirst);
253 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
254 convert(temp, removeLast);
256 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
257 allFiles = m->isTrue(temp);
259 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
260 m->setProcessors(temp);
261 convert(temp, processors);
264 if(allFiles && (oligoFile == "")){
265 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
267 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
268 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
272 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
273 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
279 catch(exception& e) {
280 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
284 //***************************************************************************************************************
286 int TrimSeqsCommand::execute(){
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 numFPrimers = 0; //this needs to be initialized
293 vector<vector<string> > fastaFileNames;
294 vector<vector<string> > qualFileNames;
296 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
297 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
299 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
300 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
302 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
303 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
304 if (qFileName != "") {
305 outputNames.push_back(trimQualFile);
306 outputNames.push_back(scrapQualFile);
307 outputTypes["qfile"].push_back(trimQualFile);
308 outputTypes["qfile"].push_back(scrapQualFile);
311 string outputGroupFileName;
313 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
314 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
315 getOligos(fastaFileNames, qualFileNames);
318 vector<unsigned long int> fastaFilePos;
319 vector<unsigned long int> qFilePos;
321 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
323 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
324 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
325 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
327 if(qFileName == "") { qLines = lines; } //files with duds
329 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
331 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
333 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames);
336 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
339 if (m->control_pressed) { return 0; }
342 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
343 map<string, string>::iterator it;
344 set<string> namesToRemove;
345 for(int i=0;i<fastaFileNames.size();i++){
346 for(int j=0;j<fastaFileNames[0].size();j++){
347 if (fastaFileNames[i][j] != "") {
348 if(m->isBlank(fastaFileNames[i][j])){
349 remove(fastaFileNames[i][j].c_str());
350 namesToRemove.insert(fastaFileNames[i][j]);
353 remove(qualFileNames[i][j].c_str());
354 namesToRemove.insert(qualFileNames[i][j]);
357 it = uniqueFastaNames.find(fastaFileNames[i][j]);
358 if (it == uniqueFastaNames.end()) {
359 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
366 //remove names for outputFileNames, just cleans up the output
367 vector<string> outputNames2;
368 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
369 outputNames = outputNames2;
371 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
373 m->openInputFile(it->first, in);
376 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
377 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
378 m->openOutputFile(thisGroupName, out);
381 if (m->control_pressed) { break; }
383 Sequence currSeq(in); m->gobble(in);
384 out << currSeq.getName() << '\t' << it->second << endl;
391 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
393 //output group counts
394 m->mothurOutEndLine();
396 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
397 total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine();
399 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
401 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
403 //set fasta file as new current fastafile
405 itTypes = outputTypes.find("fasta");
406 if (itTypes != outputTypes.end()) {
407 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
410 itTypes = outputTypes.find("qfile");
411 if (itTypes != outputTypes.end()) {
412 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
415 itTypes = outputTypes.find("group");
416 if (itTypes != outputTypes.end()) {
417 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
420 m->mothurOutEndLine();
421 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
422 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
423 m->mothurOutEndLine();
428 catch(exception& e) {
429 m->errorOut(e, "TrimSeqsCommand", "execute");
434 /**************************************************************************************/
436 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {
440 ofstream trimFASTAFile;
441 m->openOutputFile(trimFileName, trimFASTAFile);
443 ofstream scrapFASTAFile;
444 m->openOutputFile(scrapFileName, scrapFASTAFile);
446 ofstream trimQualFile;
447 ofstream scrapQualFile;
449 m->openOutputFile(trimQFileName, trimQualFile);
450 m->openOutputFile(scrapQFileName, scrapQualFile);
453 ofstream outGroupsFile;
454 if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); }
456 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
457 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
458 if (fastaFileNames[i][j] != "") {
460 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
462 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
470 m->openInputFile(filename, inFASTA);
471 inFASTA.seekg(line->start);
474 if(qFileName != "") {
475 m->openInputFile(qFileName, qFile);
476 qFile.seekg(qline->start);
484 if (m->control_pressed) {
485 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
486 if (oligoFile != "") { outGroupsFile.close(); }
491 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
497 string trashCode = "";
498 int currentSeqsDiffs = 0;
500 Sequence currSeq(inFASTA); m->gobble(inFASTA);
502 QualityScores currQual;
504 currQual = QualityScores(qFile); m->gobble(qFile);
507 string origSeq = currSeq.getUnaligned();
510 int barcodeIndex = 0;
513 if(barcodes.size() != 0){
514 success = stripBarcode(currSeq, currQual, barcodeIndex);
515 if(success > bdiffs) { trashCode += 'b'; }
516 else{ currentSeqsDiffs += success; }
519 if(numFPrimers != 0){
520 success = stripForward(currSeq, currQual, primerIndex);
521 if(success > pdiffs) { trashCode += 'f'; }
522 else{ currentSeqsDiffs += success; }
525 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
527 if(numRPrimers != 0){
528 success = stripReverse(currSeq, currQual);
529 if(!success) { trashCode += 'r'; }
533 success = keepFirstTrim(currSeq, currQual);
537 success = removeLastTrim(currSeq, currQual);
538 if(!success) { trashCode += 'l'; }
543 int origLength = currSeq.getNumBases();
545 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
546 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
547 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
548 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
549 else { success = 1; }
551 //you don't want to trim, if it fails above then scrap it
552 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
554 if(!success) { trashCode += 'q'; }
557 if(minLength > 0 || maxLength > 0){
558 success = cullLength(currSeq);
559 if(!success) { trashCode += 'l'; }
562 success = cullHomoP(currSeq);
563 if(!success) { trashCode += 'h'; }
566 success = cullAmbigs(currSeq);
567 if(!success) { trashCode += 'n'; }
570 if(flip){ // should go last
571 currSeq.reverseComplement();
573 currQual.flipQScores();
577 if(trashCode.length() == 0){
578 currSeq.setAligned(currSeq.getUnaligned());
579 currSeq.printSequence(trimFASTAFile);
582 currQual.printQScores(trimQualFile);
585 if(barcodes.size() != 0){
586 string thisGroup = barcodeNameVector[barcodeIndex];
587 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
589 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
591 map<string, int>::iterator it = groupCounts.find(thisGroup);
592 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
593 else { groupCounts[it->first]++; }
600 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
601 currSeq.printSequence(output);
605 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
606 currQual.printQScores(output);
612 currSeq.setName(currSeq.getName() + '|' + trashCode);
613 currSeq.setUnaligned(origSeq);
614 currSeq.setAligned(origSeq);
615 currSeq.printSequence(scrapFASTAFile);
617 currQual.printQScores(scrapQualFile);
623 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
624 unsigned long int pos = inFASTA.tellg();
625 if ((pos == -1) || (pos >= line->end)) { break; }
627 if (inFASTA.eof()) { break; }
631 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
635 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
639 trimFASTAFile.close();
640 scrapFASTAFile.close();
641 if (oligoFile != "") { outGroupsFile.close(); }
642 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
646 catch(exception& e) {
647 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
652 /**************************************************************************************************/
654 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
656 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
661 //loop through and create all the processes you want
662 while (process != processors) {
666 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
670 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
671 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
676 for(int i=0;i<tempFASTAFileNames.size();i++){
677 for(int j=0;j<tempFASTAFileNames[i].size();j++){
678 if (tempFASTAFileNames[i][j] != "") {
679 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
680 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
683 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
684 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
691 driverCreateTrim(filename,
693 (trimFASTAFileName + toString(getpid()) + ".temp"),
694 (scrapFASTAFileName + toString(getpid()) + ".temp"),
695 (trimQualFileName + toString(getpid()) + ".temp"),
696 (scrapQualFileName + toString(getpid()) + ".temp"),
697 (groupFile + toString(getpid()) + ".temp"),
699 tempPrimerQualFileNames,
703 //pass groupCounts to parent
705 string tempFile = filename + toString(getpid()) + ".num.temp";
706 m->openOutputFile(tempFile, out);
707 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
708 out << it->first << '\t' << it->second << endl;
714 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
715 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
722 m->openOutputFile(trimFASTAFileName, temp); temp.close();
723 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
724 m->openOutputFile(trimQualFileName, temp); temp.close();
725 m->openOutputFile(scrapQualFileName, temp); temp.close();
727 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
729 //force parent to wait until all the processes are done
730 for (int i=0;i<processIDS.size();i++) {
731 int temp = processIDS[i];
736 for(int i=0;i<processIDS.size();i++){
738 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
740 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
741 remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
742 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
743 remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
746 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
747 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
748 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
749 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
752 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
753 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
757 for(int j=0;j<fastaFileNames.size();j++){
758 for(int k=0;k<fastaFileNames[j].size();k++){
759 if (fastaFileNames[j][k] != "") {
760 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
761 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
764 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
765 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
773 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
774 m->openInputFile(tempFile, in);
778 in >> group >> tempNum; m->gobble(in);
780 map<string, int>::iterator it = groupCounts.find(group);
781 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
782 else { groupCounts[it->first] += tempNum; }
784 in.close(); remove(tempFile.c_str());
791 catch(exception& e) {
792 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
797 /**************************************************************************************************/
799 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
802 //set file positions for fasta file
803 fastaFilePos = m->divideFile(filename, processors);
805 if (qfilename == "") { return processors; }
807 //get name of first sequence in each chunk
808 map<string, int> firstSeqNames;
809 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
811 m->openInputFile(filename, in);
812 in.seekg(fastaFilePos[i]);
815 firstSeqNames[temp.getName()] = i;
820 //seach for filePos of each first name in the qfile and save in qfileFilePos
822 m->openInputFile(qfilename, inQual);
825 while(!inQual.eof()){
826 input = m->getline(inQual);
828 if (input.length() != 0) {
829 if(input[0] == '>'){ //this is a sequence name line
830 istringstream nameStream(input);
832 string sname = ""; nameStream >> sname;
833 sname = sname.substr(1);
835 map<string, int>::iterator it = firstSeqNames.find(sname);
837 if(it != firstSeqNames.end()) { //this is the start of a new chunk
838 unsigned long int pos = inQual.tellg();
839 qfileFilePos.push_back(pos - input.length() - 1);
840 firstSeqNames.erase(it);
845 if (firstSeqNames.size() == 0) { break; }
850 if (firstSeqNames.size() != 0) {
851 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
852 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
858 //get last file position of qfile
860 unsigned long int size;
862 //get num bytes in file
863 pFile = fopen (qfilename.c_str(),"rb");
864 if (pFile==NULL) perror ("Error opening file");
866 fseek (pFile, 0, SEEK_END);
871 qfileFilePos.push_back(size);
875 catch(exception& e) {
876 m->errorOut(e, "TrimSeqsCommand", "setLines");
881 //***************************************************************************************************************
883 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
886 m->openInputFile(oligoFile, inOligos);
890 string type, oligo, group;
893 int indexBarcode = 0;
895 while(!inOligos.eof()){
900 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
905 //make type case insensitive
906 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
910 for(int i=0;i<oligo.length();i++){
911 oligo[i] = toupper(oligo[i]);
912 if(oligo[i] == 'U') { oligo[i] = 'T'; }
915 if(type == "FORWARD"){
918 // get rest of line in case there is a primer name
919 while (!inOligos.eof()) {
920 char c = inOligos.get();
921 if (c == 10 || c == 13){ break; }
922 else if (c == 32 || c == 9){;} //space or tab
926 //check for repeat barcodes
927 map<string, int>::iterator itPrime = primers.find(oligo);
928 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
930 primers[oligo]=indexPrimer; indexPrimer++;
931 primerNameVector.push_back(group);
933 else if(type == "REVERSE"){
934 Sequence oligoRC("reverse", oligo);
935 oligoRC.reverseComplement();
936 revPrimer.push_back(oligoRC.getUnaligned());
938 else if(type == "BARCODE"){
941 //check for repeat barcodes
942 map<string, int>::iterator itBar = barcodes.find(oligo);
943 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
945 barcodes[oligo]=indexBarcode; indexBarcode++;
946 barcodeNameVector.push_back(group);
948 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
954 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
956 //add in potential combos
957 if(barcodeNameVector.size() == 0){
959 barcodeNameVector.push_back("");
962 if(primerNameVector.size() == 0){
964 primerNameVector.push_back("");
967 fastaFileNames.resize(barcodeNameVector.size());
968 for(int i=0;i<fastaFileNames.size();i++){
969 fastaFileNames[i].assign(primerNameVector.size(), "");
971 if(qFileName != ""){ qualFileNames = fastaFileNames; }
974 set<string> uniqueNames; //used to cleanup outputFileNames
975 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
976 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
978 string primerName = primerNameVector[itPrimer->second];
979 string barcodeName = barcodeNameVector[itBar->second];
981 string comboGroupName = "";
982 string fastaFileName = "";
983 string qualFileName = "";
985 if(primerName == ""){
986 comboGroupName = barcodeNameVector[itBar->second];
989 if(barcodeName == ""){
990 comboGroupName = primerNameVector[itPrimer->second];
993 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
998 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
999 if (uniqueNames.count(fastaFileName) == 0) {
1000 outputNames.push_back(fastaFileName);
1001 outputTypes["fasta"].push_back(fastaFileName);
1002 uniqueNames.insert(fastaFileName);
1005 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1006 m->openOutputFile(fastaFileName, temp); temp.close();
1008 if(qFileName != ""){
1009 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1010 if (uniqueNames.count(fastaFileName) == 0) {
1011 outputNames.push_back(qualFileName);
1012 outputTypes["qfile"].push_back(qualFileName);
1015 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1016 m->openOutputFile(qualFileName, temp); temp.close();
1021 numFPrimers = primers.size();
1022 numRPrimers = revPrimer.size();
1025 catch(exception& e) {
1026 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1031 //***************************************************************************************************************
1033 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1036 string rawSequence = seq.getUnaligned();
1037 int success = bdiffs + 1; //guilty until proven innocent
1039 //can you find the barcode
1040 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1041 string oligo = it->first;
1042 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1043 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1047 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1049 seq.setUnaligned(rawSequence.substr(oligo.length()));
1051 if(qual.getName() != ""){
1052 qual.trimQScores(oligo.length(), -1);
1060 //if you found the barcode or if you don't want to allow for diffs
1061 if ((bdiffs == 0) || (success == 0)) { return success; }
1063 else { //try aligning and see if you can find it
1067 Alignment* alignment;
1068 if (barcodes.size() > 0) {
1069 map<string,int>::iterator it=barcodes.begin();
1071 for(it;it!=barcodes.end();it++){
1072 if(it->first.length() > maxLength){
1073 maxLength = it->first.length();
1076 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1078 }else{ alignment = NULL; }
1080 //can you find the barcode
1086 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1087 string oligo = it->first;
1088 // int length = oligo.length();
1090 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1091 success = bdiffs + 10;
1095 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1096 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1097 oligo = alignment->getSeqAAln();
1098 string temp = alignment->getSeqBAln();
1100 int alnLength = oligo.length();
1102 for(int i=oligo.length()-1;i>=0;i--){
1103 if(oligo[i] != '-'){ alnLength = i+1; break; }
1105 oligo = oligo.substr(0,alnLength);
1106 temp = temp.substr(0,alnLength);
1108 int numDiff = countDiffs(oligo, temp);
1110 if(numDiff < minDiff){
1113 minGroup = it->second;
1115 for(int i=0;i<alnLength;i++){
1121 else if(numDiff == minDiff){
1127 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1128 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1129 else{ //use the best match
1131 seq.setUnaligned(rawSequence.substr(minPos));
1133 if(qual.getName() != ""){
1134 qual.trimQScores(minPos, -1);
1139 if (alignment != NULL) { delete alignment; }
1146 catch(exception& e) {
1147 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1153 //***************************************************************************************************************
1155 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1157 string rawSequence = seq.getUnaligned();
1158 int success = pdiffs + 1; //guilty until proven innocent
1160 //can you find the primer
1161 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1162 string oligo = it->first;
1163 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1164 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1168 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1170 seq.setUnaligned(rawSequence.substr(oligo.length()));
1171 if(qual.getName() != ""){
1172 qual.trimQScores(oligo.length(), -1);
1179 //if you found the barcode or if you don't want to allow for diffs
1180 if ((pdiffs == 0) || (success == 0)) { return success; }
1182 else { //try aligning and see if you can find it
1186 Alignment* alignment;
1187 if (primers.size() > 0) {
1188 map<string,int>::iterator it=primers.begin();
1190 for(it;it!=primers.end();it++){
1191 if(it->first.length() > maxLength){
1192 maxLength = it->first.length();
1195 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1197 }else{ alignment = NULL; }
1199 //can you find the barcode
1205 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1206 string oligo = it->first;
1207 // int length = oligo.length();
1209 if(rawSequence.length() < maxLength){
1210 success = pdiffs + 100;
1214 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1215 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1216 oligo = alignment->getSeqAAln();
1217 string temp = alignment->getSeqBAln();
1219 int alnLength = oligo.length();
1221 for(int i=oligo.length()-1;i>=0;i--){
1222 if(oligo[i] != '-'){ alnLength = i+1; break; }
1224 oligo = oligo.substr(0,alnLength);
1225 temp = temp.substr(0,alnLength);
1227 int numDiff = countDiffs(oligo, temp);
1229 if(numDiff < minDiff){
1232 minGroup = it->second;
1234 for(int i=0;i<alnLength;i++){
1240 else if(numDiff == minDiff){
1246 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1247 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1248 else{ //use the best match
1250 seq.setUnaligned(rawSequence.substr(minPos));
1251 if(qual.getName() != ""){
1252 qual.trimQScores(minPos, -1);
1257 if (alignment != NULL) { delete alignment; }
1264 catch(exception& e) {
1265 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1270 //***************************************************************************************************************
1272 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1274 string rawSequence = seq.getUnaligned();
1275 bool success = 0; //guilty until proven innocent
1277 for(int i=0;i<numRPrimers;i++){
1278 string oligo = revPrimer[i];
1280 if(rawSequence.length() < oligo.length()){
1285 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1286 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1287 if(qual.getName() != ""){
1288 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1297 catch(exception& e) {
1298 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1303 //***************************************************************************************************************
1305 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1308 if(qscores.getName() != ""){
1309 qscores.trimQScores(-1, keepFirst);
1311 sequence.trim(keepFirst);
1314 catch(exception& e) {
1315 m->errorOut(e, "keepFirstTrim", "countDiffs");
1321 //***************************************************************************************************************
1323 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1327 int length = sequence.getNumBases() - removeLast;
1330 if(qscores.getName() != ""){
1331 qscores.trimQScores(-1, length);
1333 sequence.trim(length);
1342 catch(exception& e) {
1343 m->errorOut(e, "removeLastTrim", "countDiffs");
1349 //***************************************************************************************************************
1351 bool TrimSeqsCommand::cullLength(Sequence& seq){
1354 int length = seq.getNumBases();
1355 bool success = 0; //guilty until proven innocent
1357 if(length >= minLength && maxLength == 0) { success = 1; }
1358 else if(length >= minLength && length <= maxLength) { success = 1; }
1359 else { success = 0; }
1364 catch(exception& e) {
1365 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1371 //***************************************************************************************************************
1373 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1375 int longHomoP = seq.getLongHomoPolymer();
1376 bool success = 0; //guilty until proven innocent
1378 if(longHomoP <= maxHomoP){ success = 1; }
1379 else { success = 0; }
1383 catch(exception& e) {
1384 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1390 //***************************************************************************************************************
1392 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1394 int numNs = seq.getAmbigBases();
1395 bool success = 0; //guilty until proven innocent
1397 if(numNs <= maxAmbig) { success = 1; }
1398 else { success = 0; }
1402 catch(exception& e) {
1403 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1409 //***************************************************************************************************************
1411 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1414 int length = oligo.length();
1416 for(int i=0;i<length;i++){
1418 if(oligo[i] != seq[i]){
1419 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1420 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1421 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1422 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1423 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1424 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1425 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1426 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1427 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1428 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1429 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1430 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1432 if(success == 0) { break; }
1441 catch(exception& e) {
1442 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1448 //***************************************************************************************************************
1450 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1453 int length = oligo.length();
1456 for(int i=0;i<length;i++){
1458 if(oligo[i] != seq[i]){
1459 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1460 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1461 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1462 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1463 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1464 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1465 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1466 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1467 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1468 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1469 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1470 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1477 catch(exception& e) {
1478 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1484 //***************************************************************************************************************