5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
13 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::getValidParameters(){
17 string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop","minlength", "maxlength", "qfile",
18 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19 "keepfirst", "removelast",
20 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
30 //**********************************************************************************************************************
32 TrimSeqsCommand::TrimSeqsCommand(){
34 abort = true; calledHelp = true;
35 vector<string> tempOutNames;
36 outputTypes["fasta"] = tempOutNames;
37 outputTypes["qfile"] = tempOutNames;
38 outputTypes["group"] = tempOutNames;
41 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
46 //**********************************************************************************************************************
48 vector<string> TrimSeqsCommand::getRequiredParameters(){
50 string Array[] = {"fasta"};
51 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
60 //**********************************************************************************************************************
62 vector<string> TrimSeqsCommand::getRequiredFiles(){
64 vector<string> myArray;
68 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
73 //***************************************************************************************************************
75 TrimSeqsCommand::TrimSeqsCommand(string option) {
78 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
85 //valid paramters for this command
86 string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile",
87 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
88 "keepfirst", "removelast",
89 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
91 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
93 OptionParser parser(option);
94 map<string,string> parameters = parser.getParameters();
96 ValidParameters validParameter;
97 map<string,string>::iterator it;
99 //check to make sure all parameters are valid for command
100 for (it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 //initialize outputTypes
105 vector<string> tempOutNames;
106 outputTypes["fasta"] = tempOutNames;
107 outputTypes["qfile"] = tempOutNames;
108 outputTypes["group"] = tempOutNames;
110 //if the user changes the input directory command factory will send this info to us in the output parameter
111 string inputDir = validParameter.validFile(parameters, "inputdir", false);
112 if (inputDir == "not found"){ inputDir = ""; }
115 it = parameters.find("fasta");
116 //user has given a template file
117 if(it != parameters.end()){
118 path = m->hasPath(it->second);
119 //if the user has not given a path then, add inputdir. else leave path alone.
120 if (path == "") { parameters["fasta"] = inputDir + it->second; }
123 it = parameters.find("oligos");
124 //user has given a template file
125 if(it != parameters.end()){
126 path = m->hasPath(it->second);
127 //if the user has not given a path then, add inputdir. else leave path alone.
128 if (path == "") { parameters["oligos"] = inputDir + it->second; }
131 it = parameters.find("qfile");
132 //user has given a template file
133 if(it != parameters.end()){
134 path = m->hasPath(it->second);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { parameters["qfile"] = inputDir + it->second; }
142 //check for required parameters
143 fastaFile = validParameter.validFile(parameters, "fasta", true);
144 if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
145 else if (fastaFile == "not open") { abort = true; }
147 //if the user changes the output directory command factory will send this info to us in the output parameter
148 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
150 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
154 //check for optional parameter and set defaults
155 // ...at some point should added some additional type checking...
157 temp = validParameter.validFile(parameters, "flip", false);
158 if (temp == "not found"){ flip = 0; }
159 else if(m->isTrue(temp)) { flip = 1; }
161 temp = validParameter.validFile(parameters, "oligos", true);
162 if (temp == "not found"){ oligoFile = ""; }
163 else if(temp == "not open"){ abort = true; }
164 else { oligoFile = temp; }
167 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
168 convert(temp, maxAmbig);
170 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
171 convert(temp, maxHomoP);
173 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
174 convert(temp, minLength);
176 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
177 convert(temp, maxLength);
179 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
180 convert(temp, bdiffs);
182 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
183 convert(temp, pdiffs);
185 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); }
186 convert(temp, tdiffs);
188 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; }
190 temp = validParameter.validFile(parameters, "qfile", true);
191 if (temp == "not found") { qFileName = ""; }
192 else if(temp == "not open") { abort = true; }
193 else { qFileName = temp; }
195 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
196 convert(temp, qThreshold);
198 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
199 qtrim = m->isTrue(temp);
201 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
202 convert(temp, qRollAverage);
204 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
205 convert(temp, qWindowAverage);
207 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
208 convert(temp, qWindowSize);
210 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
211 convert(temp, qWindowStep);
213 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
214 convert(temp, qAverage);
216 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
217 convert(temp, keepFirst);
219 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
220 convert(temp, removeLast);
222 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
223 allFiles = m->isTrue(temp);
225 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
226 convert(temp, processors);
229 if(allFiles && (oligoFile == "")){
230 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
232 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
233 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
237 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
238 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
244 catch(exception& e) {
245 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
250 //**********************************************************************************************************************
252 void TrimSeqsCommand::help(){
254 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
255 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
256 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
257 m->mothurOut("The fasta parameter is required.\n");
258 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
259 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
260 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
261 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
262 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
263 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
264 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
265 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
266 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
267 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
268 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
269 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
270 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
271 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
272 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
273 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
274 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
275 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
276 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
277 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
278 m->mothurOut("The trim.seqs command should be in the following format: \n");
279 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
280 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
281 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
282 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
283 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
286 catch(exception& e) {
287 m->errorOut(e, "TrimSeqsCommand", "help");
293 //***************************************************************************************************************
295 TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ }
297 //***************************************************************************************************************
299 int TrimSeqsCommand::execute(){
302 if (abort == true) { if (calledHelp) { return 0; } return 2; }
304 numFPrimers = 0; //this needs to be initialized
306 vector<vector<string> > fastaFileNames;
307 vector<vector<string> > qualFileNames;
309 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
310 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
312 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
313 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
315 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
316 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
317 if (qFileName != "") {
318 outputNames.push_back(trimQualFile);
319 outputNames.push_back(scrapQualFile);
320 outputTypes["qfile"].push_back(trimQualFile);
321 outputTypes["qfile"].push_back(scrapQualFile);
324 string outputGroupFileName;
326 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
327 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
328 getOligos(fastaFileNames, qualFileNames);
331 vector<unsigned long int> fastaFilePos;
332 vector<unsigned long int> qFilePos;
334 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
336 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
337 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
338 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
340 if(qFileName == "") { qLines = lines; } //files with duds
342 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
344 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
346 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames);
349 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
352 if (m->control_pressed) { return 0; }
355 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
356 map<string, string>::iterator it;
357 set<string> namesToRemove;
358 for(int i=0;i<fastaFileNames.size();i++){
359 for(int j=0;j<fastaFileNames[0].size();j++){
360 if (fastaFileNames[i][j] != "") {
361 if(m->isBlank(fastaFileNames[i][j])){
362 remove(fastaFileNames[i][j].c_str());
363 namesToRemove.insert(fastaFileNames[i][j]);
366 remove(qualFileNames[i][j].c_str());
367 namesToRemove.insert(qualFileNames[i][j]);
370 it = uniqueFastaNames.find(fastaFileNames[i][j]);
371 if (it == uniqueFastaNames.end()) {
372 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
379 //remove names for outputFileNames, just cleans up the output
380 vector<string> outputNames2;
381 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
382 outputNames = outputNames2;
384 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
386 m->openInputFile(it->first, in);
389 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
390 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
391 m->openOutputFile(thisGroupName, out);
394 if (m->control_pressed) { break; }
396 Sequence currSeq(in); m->gobble(in);
397 out << currSeq.getName() << '\t' << it->second << endl;
404 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
406 //output group counts
407 m->mothurOutEndLine();
409 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
410 total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine();
412 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
414 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
416 //set fasta file as new current fastafile
418 itTypes = outputTypes.find("fasta");
419 if (itTypes != outputTypes.end()) {
420 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
423 itTypes = outputTypes.find("qfile");
424 if (itTypes != outputTypes.end()) {
425 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
428 itTypes = outputTypes.find("group");
429 if (itTypes != outputTypes.end()) {
430 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
433 m->mothurOutEndLine();
434 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
435 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
436 m->mothurOutEndLine();
441 catch(exception& e) {
442 m->errorOut(e, "TrimSeqsCommand", "execute");
447 /**************************************************************************************/
449 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {
453 ofstream trimFASTAFile;
454 m->openOutputFile(trimFileName, trimFASTAFile);
456 ofstream scrapFASTAFile;
457 m->openOutputFile(scrapFileName, scrapFASTAFile);
459 ofstream trimQualFile;
460 ofstream scrapQualFile;
462 m->openOutputFile(trimQFileName, trimQualFile);
463 m->openOutputFile(scrapQFileName, scrapQualFile);
466 ofstream outGroupsFile;
467 if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); }
469 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
470 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
471 if (fastaFileNames[i][j] != "") {
473 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
475 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
483 m->openInputFile(filename, inFASTA);
484 inFASTA.seekg(line->start);
487 if(qFileName != "") {
488 m->openInputFile(qFileName, qFile);
489 qFile.seekg(qline->start);
497 if (m->control_pressed) {
498 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
499 if (oligoFile != "") { outGroupsFile.close(); }
504 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
510 string trashCode = "";
511 int currentSeqsDiffs = 0;
513 Sequence currSeq(inFASTA); m->gobble(inFASTA);
515 QualityScores currQual;
517 currQual = QualityScores(qFile); m->gobble(qFile);
520 string origSeq = currSeq.getUnaligned();
523 int barcodeIndex = 0;
526 if(barcodes.size() != 0){
527 success = stripBarcode(currSeq, currQual, barcodeIndex);
528 if(success > bdiffs) { trashCode += 'b'; }
529 else{ currentSeqsDiffs += success; }
532 if(numFPrimers != 0){
533 success = stripForward(currSeq, currQual, primerIndex);
534 if(success > pdiffs) { trashCode += 'f'; }
535 else{ currentSeqsDiffs += success; }
538 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
540 if(numRPrimers != 0){
541 success = stripReverse(currSeq, currQual);
542 if(!success) { trashCode += 'r'; }
546 success = keepFirstTrim(currSeq, currQual);
550 success = removeLastTrim(currSeq, currQual);
551 if(!success) { trashCode += 'l'; }
556 int origLength = currSeq.getNumBases();
558 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
559 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
560 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
561 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
562 else { success = 1; }
564 //you don't want to trim, if it fails above then scrap it
565 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
567 if(!success) { trashCode += 'q'; }
570 if(minLength > 0 || maxLength > 0){
571 success = cullLength(currSeq);
572 if(!success) { trashCode += 'l'; }
575 success = cullHomoP(currSeq);
576 if(!success) { trashCode += 'h'; }
579 success = cullAmbigs(currSeq);
580 if(!success) { trashCode += 'n'; }
583 if(flip){ // should go last
584 currSeq.reverseComplement();
586 currQual.flipQScores();
590 if(trashCode.length() == 0){
591 currSeq.setAligned(currSeq.getUnaligned());
592 currSeq.printSequence(trimFASTAFile);
595 currQual.printQScores(trimQualFile);
598 if(barcodes.size() != 0){
599 string thisGroup = barcodeNameVector[barcodeIndex];
600 if (primers.size() != 0) { thisGroup += "." + primerNameVector[primerIndex]; }
602 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
604 map<string, int>::iterator it = groupCounts.find(thisGroup);
605 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
606 else { groupCounts[it->first]++; }
613 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
614 currSeq.printSequence(output);
618 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
619 currQual.printQScores(output);
625 currSeq.setName(currSeq.getName() + '|' + trashCode);
626 currSeq.setUnaligned(origSeq);
627 currSeq.setAligned(origSeq);
628 currSeq.printSequence(scrapFASTAFile);
630 currQual.printQScores(scrapQualFile);
636 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
637 unsigned long int pos = inFASTA.tellg();
638 if ((pos == -1) || (pos >= line->end)) { break; }
640 if (inFASTA.eof()) { break; }
644 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
648 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
652 trimFASTAFile.close();
653 scrapFASTAFile.close();
654 if (oligoFile != "") { outGroupsFile.close(); }
655 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
659 catch(exception& e) {
660 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
665 /**************************************************************************************************/
667 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
669 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
674 //loop through and create all the processes you want
675 while (process != processors) {
679 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
683 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
684 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
689 for(int i=0;i<tempFASTAFileNames.size();i++){
690 for(int j=0;j<tempFASTAFileNames[i].size();j++){
691 if (tempFASTAFileNames[i][j] != "") {
692 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
693 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
696 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
697 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
704 driverCreateTrim(filename,
706 (trimFASTAFileName + toString(getpid()) + ".temp"),
707 (scrapFASTAFileName + toString(getpid()) + ".temp"),
708 (trimQualFileName + toString(getpid()) + ".temp"),
709 (scrapQualFileName + toString(getpid()) + ".temp"),
710 (groupFile + toString(getpid()) + ".temp"),
712 tempPrimerQualFileNames,
716 //pass groupCounts to parent
718 string tempFile = filename + toString(getpid()) + ".num.temp";
719 m->openOutputFile(tempFile, out);
720 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
721 out << it->first << '\t' << it->second << endl;
727 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
728 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
735 m->openOutputFile(trimFASTAFileName, temp); temp.close();
736 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
737 m->openOutputFile(trimQualFileName, temp); temp.close();
738 m->openOutputFile(scrapQualFileName, temp); temp.close();
740 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
742 //force parent to wait until all the processes are done
743 for (int i=0;i<processIDS.size();i++) {
744 int temp = processIDS[i];
749 for(int i=0;i<processIDS.size();i++){
751 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
753 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
754 remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
755 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
756 remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
759 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
760 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
761 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
762 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
765 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
766 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
770 for(int j=0;j<fastaFileNames.size();j++){
771 for(int k=0;k<fastaFileNames[j].size();k++){
772 if (fastaFileNames[j][k] != "") {
773 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
774 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
777 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
778 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
786 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
787 m->openInputFile(tempFile, in);
791 in >> group >> tempNum; m->gobble(in);
793 map<string, int>::iterator it = groupCounts.find(group);
794 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
795 else { groupCounts[it->first] += tempNum; }
797 in.close(); remove(tempFile.c_str());
804 catch(exception& e) {
805 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
810 /**************************************************************************************************/
812 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
815 //set file positions for fasta file
816 fastaFilePos = m->divideFile(filename, processors);
818 if (qfilename == "") { return processors; }
820 //get name of first sequence in each chunk
821 map<string, int> firstSeqNames;
822 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
824 m->openInputFile(filename, in);
825 in.seekg(fastaFilePos[i]);
828 firstSeqNames[temp.getName()] = i;
833 //seach for filePos of each first name in the qfile and save in qfileFilePos
835 m->openInputFile(qfilename, inQual);
838 while(!inQual.eof()){
839 input = m->getline(inQual);
841 if (input.length() != 0) {
842 if(input[0] == '>'){ //this is a sequence name line
843 istringstream nameStream(input);
845 string sname = ""; nameStream >> sname;
846 sname = sname.substr(1);
848 map<string, int>::iterator it = firstSeqNames.find(sname);
850 if(it != firstSeqNames.end()) { //this is the start of a new chunk
851 unsigned long int pos = inQual.tellg();
852 qfileFilePos.push_back(pos - input.length() - 1);
853 firstSeqNames.erase(it);
858 if (firstSeqNames.size() == 0) { break; }
863 if (firstSeqNames.size() != 0) {
864 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
865 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
871 //get last file position of qfile
873 unsigned long int size;
875 //get num bytes in file
876 pFile = fopen (qfilename.c_str(),"rb");
877 if (pFile==NULL) perror ("Error opening file");
879 fseek (pFile, 0, SEEK_END);
884 qfileFilePos.push_back(size);
888 catch(exception& e) {
889 m->errorOut(e, "TrimSeqsCommand", "setLines");
894 //***************************************************************************************************************
896 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
899 m->openInputFile(oligoFile, inOligos);
903 string type, oligo, group;
906 int indexBarcode = 0;
908 while(!inOligos.eof()){
910 inOligos >> type; m->gobble(inOligos);
913 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
916 //make type case insensitive
917 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
921 for(int i=0;i<oligo.length();i++){
922 oligo[i] = toupper(oligo[i]);
923 if(oligo[i] == 'U') { oligo[i] = 'T'; }
926 if(type == "FORWARD"){
929 // get rest of line in case there is a primer name
930 while (!inOligos.eof()) {
931 char c = inOligos.get();
932 if (c == 10 || c == 13){ break; }
933 else if (c == 32 || c == 9){;} //space or tab
937 //check for repeat barcodes
938 map<string, int>::iterator itPrime = primers.find(oligo);
939 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
941 primers[oligo]=indexPrimer; indexPrimer++;
942 primerNameVector.push_back(group);
944 else if(type == "REVERSE"){
945 Sequence oligoRC("reverse", oligo);
946 oligoRC.reverseComplement();
947 revPrimer.push_back(oligoRC.getUnaligned());
949 else if(type == "BARCODE"){
952 //check for repeat barcodes
953 map<string, int>::iterator itBar = barcodes.find(oligo);
954 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
956 barcodes[oligo]=indexBarcode; indexBarcode++;
957 barcodeNameVector.push_back(group);
959 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
965 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
967 //add in potential combos
968 if(barcodeNameVector.size() == 0){
970 barcodeNameVector.push_back("");
973 if(primerNameVector.size() == 0){
975 primerNameVector.push_back("");
978 fastaFileNames.resize(barcodeNameVector.size());
979 for(int i=0;i<fastaFileNames.size();i++){
980 fastaFileNames[i].assign(primerNameVector.size(), "");
982 if(qFileName != ""){ qualFileNames = fastaFileNames; }
985 set<string> uniqueNames; //used to cleanup outputFileNames
986 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
987 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
989 string primerName = primerNameVector[itPrimer->second];
990 string barcodeName = barcodeNameVector[itBar->second];
992 string comboGroupName = "";
993 string fastaFileName = "";
994 string qualFileName = "";
996 if(primerName == ""){
997 comboGroupName = barcodeNameVector[itBar->second];
1000 if(barcodeName == ""){
1001 comboGroupName = primerNameVector[itPrimer->second];
1004 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1009 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1010 if (uniqueNames.count(fastaFileName) == 0) {
1011 outputNames.push_back(fastaFileName);
1012 outputTypes["fasta"].push_back(fastaFileName);
1013 uniqueNames.insert(fastaFileName);
1016 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1017 m->openOutputFile(fastaFileName, temp); temp.close();
1019 if(qFileName != ""){
1020 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1021 if (uniqueNames.count(fastaFileName) == 0) {
1022 outputNames.push_back(qualFileName);
1023 outputTypes["qfile"].push_back(qualFileName);
1026 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1027 m->openOutputFile(qualFileName, temp); temp.close();
1032 numFPrimers = primers.size();
1033 numRPrimers = revPrimer.size();
1036 catch(exception& e) {
1037 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1042 //***************************************************************************************************************
1044 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1047 string rawSequence = seq.getUnaligned();
1048 int success = bdiffs + 1; //guilty until proven innocent
1050 //can you find the barcode
1051 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1052 string oligo = it->first;
1053 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
1054 success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
1058 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1060 seq.setUnaligned(rawSequence.substr(oligo.length()));
1062 if(qual.getName() != ""){
1063 qual.trimQScores(oligo.length(), -1);
1071 //if you found the barcode or if you don't want to allow for diffs
1072 if ((bdiffs == 0) || (success == 0)) { return success; }
1074 else { //try aligning and see if you can find it
1078 Alignment* alignment;
1079 if (barcodes.size() > 0) {
1080 map<string,int>::iterator it=barcodes.begin();
1082 for(it;it!=barcodes.end();it++){
1083 if(it->first.length() > maxLength){
1084 maxLength = it->first.length();
1087 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));
1089 }else{ alignment = NULL; }
1091 //can you find the barcode
1097 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1098 string oligo = it->first;
1099 // int length = oligo.length();
1101 if(rawSequence.length() < maxLength){ //let's just assume that the barcodes are the same length
1102 success = bdiffs + 10;
1106 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1107 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1108 oligo = alignment->getSeqAAln();
1109 string temp = alignment->getSeqBAln();
1111 int alnLength = oligo.length();
1113 for(int i=oligo.length()-1;i>=0;i--){
1114 if(oligo[i] != '-'){ alnLength = i+1; break; }
1116 oligo = oligo.substr(0,alnLength);
1117 temp = temp.substr(0,alnLength);
1119 int numDiff = countDiffs(oligo, temp);
1121 if(numDiff < minDiff){
1124 minGroup = it->second;
1126 for(int i=0;i<alnLength;i++){
1132 else if(numDiff == minDiff){
1138 if(minDiff > bdiffs) { success = minDiff; } //no good matches
1139 else if(minCount > 1) { success = bdiffs + 100; } //can't tell the difference between multiple barcodes
1140 else{ //use the best match
1142 seq.setUnaligned(rawSequence.substr(minPos));
1144 if(qual.getName() != ""){
1145 qual.trimQScores(minPos, -1);
1150 if (alignment != NULL) { delete alignment; }
1157 catch(exception& e) {
1158 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1164 //***************************************************************************************************************
1166 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1168 string rawSequence = seq.getUnaligned();
1169 int success = pdiffs + 1; //guilty until proven innocent
1171 //can you find the primer
1172 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1173 string oligo = it->first;
1174 if(rawSequence.length() < oligo.length()){ //let's just assume that the primers are the same length
1175 success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
1179 if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1181 seq.setUnaligned(rawSequence.substr(oligo.length()));
1182 if(qual.getName() != ""){
1183 qual.trimQScores(oligo.length(), -1);
1190 //if you found the barcode or if you don't want to allow for diffs
1191 if ((pdiffs == 0) || (success == 0)) { return success; }
1193 else { //try aligning and see if you can find it
1197 Alignment* alignment;
1198 if (primers.size() > 0) {
1199 map<string,int>::iterator it=primers.begin();
1201 for(it;it!=primers.end();it++){
1202 if(it->first.length() > maxLength){
1203 maxLength = it->first.length();
1206 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));
1208 }else{ alignment = NULL; }
1210 //can you find the barcode
1216 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1217 string oligo = it->first;
1218 // int length = oligo.length();
1220 if(rawSequence.length() < maxLength){
1221 success = pdiffs + 100;
1225 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1226 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1227 oligo = alignment->getSeqAAln();
1228 string temp = alignment->getSeqBAln();
1230 int alnLength = oligo.length();
1232 for(int i=oligo.length()-1;i>=0;i--){
1233 if(oligo[i] != '-'){ alnLength = i+1; break; }
1235 oligo = oligo.substr(0,alnLength);
1236 temp = temp.substr(0,alnLength);
1238 int numDiff = countDiffs(oligo, temp);
1240 if(numDiff < minDiff){
1243 minGroup = it->second;
1245 for(int i=0;i<alnLength;i++){
1251 else if(numDiff == minDiff){
1257 if(minDiff > pdiffs) { success = minDiff; } //no good matches
1258 else if(minCount > 1) { success = pdiffs + 10; } //can't tell the difference between multiple primers
1259 else{ //use the best match
1261 seq.setUnaligned(rawSequence.substr(minPos));
1262 if(qual.getName() != ""){
1263 qual.trimQScores(minPos, -1);
1268 if (alignment != NULL) { delete alignment; }
1275 catch(exception& e) {
1276 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1281 //***************************************************************************************************************
1283 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1285 string rawSequence = seq.getUnaligned();
1286 bool success = 0; //guilty until proven innocent
1288 for(int i=0;i<numRPrimers;i++){
1289 string oligo = revPrimer[i];
1291 if(rawSequence.length() < oligo.length()){
1296 if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1297 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1298 if(qual.getName() != ""){
1299 qual.trimQScores(-1, rawSequence.length()-oligo.length());
1308 catch(exception& e) {
1309 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1314 //***************************************************************************************************************
1316 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1319 if(qscores.getName() != ""){
1320 qscores.trimQScores(-1, keepFirst);
1322 sequence.trim(keepFirst);
1325 catch(exception& e) {
1326 m->errorOut(e, "keepFirstTrim", "countDiffs");
1332 //***************************************************************************************************************
1334 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1338 int length = sequence.getNumBases() - removeLast;
1341 if(qscores.getName() != ""){
1342 qscores.trimQScores(-1, length);
1344 sequence.trim(length);
1353 catch(exception& e) {
1354 m->errorOut(e, "removeLastTrim", "countDiffs");
1360 //***************************************************************************************************************
1362 bool TrimSeqsCommand::cullLength(Sequence& seq){
1365 int length = seq.getNumBases();
1366 bool success = 0; //guilty until proven innocent
1368 if(length >= minLength && maxLength == 0) { success = 1; }
1369 else if(length >= minLength && length <= maxLength) { success = 1; }
1370 else { success = 0; }
1375 catch(exception& e) {
1376 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1382 //***************************************************************************************************************
1384 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1386 int longHomoP = seq.getLongHomoPolymer();
1387 bool success = 0; //guilty until proven innocent
1389 if(longHomoP <= maxHomoP){ success = 1; }
1390 else { success = 0; }
1394 catch(exception& e) {
1395 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1401 //***************************************************************************************************************
1403 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1405 int numNs = seq.getAmbigBases();
1406 bool success = 0; //guilty until proven innocent
1408 if(numNs <= maxAmbig) { success = 1; }
1409 else { success = 0; }
1413 catch(exception& e) {
1414 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1420 //***************************************************************************************************************
1422 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1425 int length = oligo.length();
1427 for(int i=0;i<length;i++){
1429 if(oligo[i] != seq[i]){
1430 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
1431 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
1432 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
1433 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
1434 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
1435 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1436 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
1437 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1438 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1439 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
1440 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
1441 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
1443 if(success == 0) { break; }
1452 catch(exception& e) {
1453 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1459 //***************************************************************************************************************
1461 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1464 int length = oligo.length();
1467 for(int i=0;i<length;i++){
1469 if(oligo[i] != seq[i]){
1470 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.') { countDiffs++; }
1471 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
1472 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
1473 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
1474 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
1475 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1476 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
1477 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1478 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1479 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
1480 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
1481 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
1488 catch(exception& e) {
1489 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1495 //***************************************************************************************************************