5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "TrimSeqsCommand", "setParameters");
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62 helpString += "The fasta parameter is required.\n";
63 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75 helpString += "The qfile parameter allows you to provide a quality file.\n";
76 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87 helpString += "The trim.seqs command should be in the following format: \n";
88 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n";
89 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n";
90 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
102 //**********************************************************************************************************************
104 TrimSeqsCommand::TrimSeqsCommand(){
106 abort = true; calledHelp = true;
108 vector<string> tempOutNames;
109 outputTypes["fasta"] = tempOutNames;
110 outputTypes["qfile"] = tempOutNames;
111 outputTypes["group"] = tempOutNames;
112 outputTypes["name"] = tempOutNames;
114 catch(exception& e) {
115 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
119 //***************************************************************************************************************
121 TrimSeqsCommand::TrimSeqsCommand(string option) {
124 abort = false; calledHelp = false;
127 //allow user to run help
128 if(option == "help") { help(); abort = true; calledHelp = true; }
129 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
132 vector<string> myArray = setParameters();
134 OptionParser parser(option);
135 map<string,string> parameters = parser.getParameters();
137 ValidParameters validParameter;
138 map<string,string>::iterator it;
140 //check to make sure all parameters are valid for command
141 for (it = parameters.begin(); it != parameters.end(); it++) {
142 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
145 //initialize outputTypes
146 vector<string> tempOutNames;
147 outputTypes["fasta"] = tempOutNames;
148 outputTypes["qfile"] = tempOutNames;
149 outputTypes["group"] = tempOutNames;
150 outputTypes["name"] = tempOutNames;
152 //if the user changes the input directory command factory will send this info to us in the output parameter
153 string inputDir = validParameter.validFile(parameters, "inputdir", false);
154 if (inputDir == "not found"){ inputDir = ""; }
157 it = parameters.find("fasta");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["fasta"] = inputDir + it->second; }
165 it = parameters.find("oligos");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["oligos"] = inputDir + it->second; }
173 it = parameters.find("qfile");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["qfile"] = inputDir + it->second; }
181 it = parameters.find("name");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["name"] = inputDir + it->second; }
192 //check for required parameters
193 fastaFile = validParameter.validFile(parameters, "fasta", true);
194 if (fastaFile == "not found") {
195 fastaFile = m->getFastaFile();
196 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198 }else if (fastaFile == "not open") { abort = true; }
199 else { m->setFastaFile(fastaFile); }
201 //if the user changes the output directory command factory will send this info to us in the output parameter
202 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
204 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
211 temp = validParameter.validFile(parameters, "flip", false);
212 if (temp == "not found") { flip = 0; }
213 else { flip = m->isTrue(temp); }
215 temp = validParameter.validFile(parameters, "oligos", true);
216 if (temp == "not found"){ oligoFile = ""; }
217 else if(temp == "not open"){ abort = true; }
218 else { oligoFile = temp; m->setOligosFile(oligoFile); }
221 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
222 m->mothurConvert(temp, maxAmbig);
224 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
225 m->mothurConvert(temp, maxHomoP);
227 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
228 m->mothurConvert(temp, minLength);
230 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
231 m->mothurConvert(temp, maxLength);
233 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
234 m->mothurConvert(temp, bdiffs);
236 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
237 m->mothurConvert(temp, pdiffs);
239 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
240 m->mothurConvert(temp, ldiffs);
242 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
243 m->mothurConvert(temp, sdiffs);
245 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
246 m->mothurConvert(temp, tdiffs);
248 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
250 temp = validParameter.validFile(parameters, "qfile", true);
251 if (temp == "not found") { qFileName = ""; }
252 else if(temp == "not open") { abort = true; }
253 else { qFileName = temp; m->setQualFile(qFileName); }
255 temp = validParameter.validFile(parameters, "name", true);
256 if (temp == "not found") { nameFile = ""; }
257 else if(temp == "not open") { nameFile = ""; abort = true; }
258 else { nameFile = temp; m->setNameFile(nameFile); }
260 temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
261 m->mothurConvert(temp, qThreshold);
263 temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; }
264 qtrim = m->isTrue(temp);
266 temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; }
267 convert(temp, qRollAverage);
269 temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270 convert(temp, qWindowAverage);
272 temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; }
273 convert(temp, qWindowSize);
275 temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; }
276 convert(temp, qWindowStep);
278 temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
279 convert(temp, qAverage);
281 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
282 convert(temp, keepFirst);
284 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
285 convert(temp, removeLast);
287 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
288 allFiles = m->isTrue(temp);
290 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
291 keepforward = m->isTrue(temp);
293 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
294 m->setProcessors(temp);
295 m->mothurConvert(temp, processors);
298 if(allFiles && (oligoFile == "")){
299 m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine();
301 if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
306 if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
307 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
311 if (nameFile == "") {
312 vector<string> files; files.push_back(fastaFile);
313 parser.getNameFile(files);
318 catch(exception& e) {
319 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
323 //***************************************************************************************************************
325 int TrimSeqsCommand::execute(){
328 if (abort == true) { if (calledHelp) { return 0; } return 2; }
330 numFPrimers = 0; //this needs to be initialized
333 vector<vector<string> > fastaFileNames;
334 vector<vector<string> > qualFileNames;
335 vector<vector<string> > nameFileNames;
337 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
338 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
340 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
341 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
343 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
344 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
346 if (qFileName != "") {
347 outputNames.push_back(trimQualFile);
348 outputNames.push_back(scrapQualFile);
349 outputTypes["qfile"].push_back(trimQualFile);
350 outputTypes["qfile"].push_back(scrapQualFile);
353 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
354 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
356 if (nameFile != "") {
357 m->readNames(nameFile, nameMap);
358 outputNames.push_back(trimNameFile);
359 outputNames.push_back(scrapNameFile);
360 outputTypes["name"].push_back(trimNameFile);
361 outputTypes["name"].push_back(scrapNameFile);
364 if (m->control_pressed) { return 0; }
366 string outputGroupFileName;
368 createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
370 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
371 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
375 vector<unsigned long long> fastaFilePos;
376 vector<unsigned long long> qFilePos;
378 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
380 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
381 lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
382 if (qFileName != "") { qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)])); }
384 if(qFileName == "") { qLines = lines; } //files with duds
386 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
388 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
390 createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
393 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
396 if (m->control_pressed) { return 0; }
399 map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
400 map<string, string>::iterator it;
401 set<string> namesToRemove;
402 for(int i=0;i<fastaFileNames.size();i++){
403 for(int j=0;j<fastaFileNames[0].size();j++){
404 if (fastaFileNames[i][j] != "") {
405 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
406 if(m->isBlank(fastaFileNames[i][j])){
407 m->mothurRemove(fastaFileNames[i][j]);
408 namesToRemove.insert(fastaFileNames[i][j]);
411 m->mothurRemove(qualFileNames[i][j]);
412 namesToRemove.insert(qualFileNames[i][j]);
416 m->mothurRemove(nameFileNames[i][j]);
417 namesToRemove.insert(nameFileNames[i][j]);
420 it = uniqueFastaNames.find(fastaFileNames[i][j]);
421 if (it == uniqueFastaNames.end()) {
422 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];
430 //remove names for outputFileNames, just cleans up the output
431 vector<string> outputNames2;
432 for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
433 outputNames = outputNames2;
435 for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
437 m->openInputFile(it->first, in);
440 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
441 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
442 m->openOutputFile(thisGroupName, out);
445 if (m->control_pressed) { break; }
447 Sequence currSeq(in); m->gobble(in);
448 out << currSeq.getName() << '\t' << it->second << endl;
455 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
457 //output group counts
458 m->mothurOutEndLine();
460 if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); }
461 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
462 total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine();
464 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
466 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
468 //set fasta file as new current fastafile
470 itTypes = outputTypes.find("fasta");
471 if (itTypes != outputTypes.end()) {
472 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
475 itTypes = outputTypes.find("name");
476 if (itTypes != outputTypes.end()) {
477 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
480 itTypes = outputTypes.find("qfile");
481 if (itTypes != outputTypes.end()) {
482 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
485 itTypes = outputTypes.find("group");
486 if (itTypes != outputTypes.end()) {
487 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
490 m->mothurOutEndLine();
491 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
492 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
493 m->mothurOutEndLine();
498 catch(exception& e) {
499 m->errorOut(e, "TrimSeqsCommand", "execute");
504 /**************************************************************************************/
506 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {
510 ofstream trimFASTAFile;
511 m->openOutputFile(trimFileName, trimFASTAFile);
513 ofstream scrapFASTAFile;
514 m->openOutputFile(scrapFileName, scrapFASTAFile);
516 ofstream trimQualFile;
517 ofstream scrapQualFile;
519 m->openOutputFile(trimQFileName, trimQualFile);
520 m->openOutputFile(scrapQFileName, scrapQualFile);
523 ofstream trimNameFile;
524 ofstream scrapNameFile;
526 m->openOutputFile(trimNFileName, trimNameFile);
527 m->openOutputFile(scrapNFileName, scrapNameFile);
531 ofstream outGroupsFile;
532 if (createGroup){ m->openOutputFile(groupFileName, outGroupsFile); }
534 for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
535 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
536 if (fastaFileNames[i][j] != "") {
538 m->openOutputFile(fastaFileNames[i][j], temp); temp.close();
540 m->openOutputFile(qualFileNames[i][j], temp); temp.close();
544 m->openOutputFile(nameFileNames[i][j], temp); temp.close();
552 m->openInputFile(filename, inFASTA);
553 inFASTA.seekg(line->start);
556 if(qFileName != "") {
557 m->openInputFile(qFileName, qFile);
558 qFile.seekg(qline->start);
563 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
567 if (m->control_pressed) {
568 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
569 if (createGroup) { outGroupsFile.close(); }
574 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
580 string trashCode = "";
581 int currentSeqsDiffs = 0;
583 Sequence currSeq(inFASTA); m->gobble(inFASTA);
584 //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
585 QualityScores currQual;
587 currQual = QualityScores(qFile); m->gobble(qFile);
590 string origSeq = currSeq.getUnaligned();
593 int barcodeIndex = 0;
597 success = trimOligos.stripLinker(currSeq, currQual);
598 if(!success) { trashCode += 'k'; }
601 if(barcodes.size() != 0){
602 success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
603 if(success > bdiffs) { trashCode += 'b'; }
604 else{ currentSeqsDiffs += success; }
608 success = trimOligos.stripSpacer(currSeq, currQual);
609 if(!success) { trashCode += 's'; }
612 if(numFPrimers != 0){
613 success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
614 if(success > pdiffs) { trashCode += 'f'; }
615 else{ currentSeqsDiffs += success; }
618 if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
620 if(numRPrimers != 0){
621 success = trimOligos.stripReverse(currSeq, currQual);
622 if(!success) { trashCode += 'r'; }
626 success = keepFirstTrim(currSeq, currQual);
630 success = removeLastTrim(currSeq, currQual);
631 if(!success) { trashCode += 'l'; }
636 int origLength = currSeq.getNumBases();
638 if(qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, qThreshold); }
639 else if(qAverage != 0) { success = currQual.cullQualAverage(currSeq, qAverage); }
640 else if(qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, qRollAverage); }
641 else if(qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage); }
642 else { success = 1; }
644 //you don't want to trim, if it fails above then scrap it
645 if ((!qtrim) && (origLength != currSeq.getNumBases())) { success = 0; }
647 if(!success) { trashCode += 'q'; }
650 if(minLength > 0 || maxLength > 0){
651 success = cullLength(currSeq);
652 if(!success) { trashCode += 'l'; }
655 success = cullHomoP(currSeq);
656 if(!success) { trashCode += 'h'; }
659 success = cullAmbigs(currSeq);
660 if(!success) { trashCode += 'n'; }
663 if(flip){ // should go last
664 currSeq.reverseComplement();
666 currQual.flipQScores();
670 if(trashCode.length() == 0){
671 currSeq.setAligned(currSeq.getUnaligned());
672 currSeq.printSequence(trimFASTAFile);
675 currQual.printQScores(trimQualFile);
679 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
680 if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
681 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
685 if(barcodes.size() != 0){
686 string thisGroup = barcodeNameVector[barcodeIndex];
687 if (primers.size() != 0) {
688 if (primerNameVector[primerIndex] != "") {
689 if(thisGroup != "") {
690 thisGroup += "." + primerNameVector[primerIndex];
692 thisGroup = primerNameVector[primerIndex];
697 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
699 if (nameFile != "") {
700 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
701 if (itName != nameMap.end()) {
702 vector<string> thisSeqsNames;
703 m->splitAtChar(itName->second, thisSeqsNames, ',');
704 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
705 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
707 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
710 map<string, int>::iterator it = groupCounts.find(thisGroup);
711 if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; }
712 else { groupCounts[it->first]++; }
719 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
720 currSeq.printSequence(output);
724 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
725 currQual.printQScores(output);
730 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
731 if (itName != nameMap.end()) {
732 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
733 output << itName->first << '\t' << itName->second << endl;
735 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
740 if(nameFile != ""){ //needs to be before the currSeq name is changed
741 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
742 if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
743 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
745 currSeq.setName(currSeq.getName() + '|' + trashCode);
746 currSeq.setUnaligned(origSeq);
747 currSeq.setAligned(origSeq);
748 currSeq.printSequence(scrapFASTAFile);
750 currQual.printQScores(scrapQualFile);
756 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
757 unsigned long long pos = inFASTA.tellg();
758 if ((pos == -1) || (pos >= line->end)) { break; }
761 if (inFASTA.eof()) { break; }
765 if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
769 if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
773 trimFASTAFile.close();
774 scrapFASTAFile.close();
775 if (createGroup) { outGroupsFile.close(); }
776 if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
777 if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
781 catch(exception& e) {
782 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
787 /**************************************************************************************************/
789 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
791 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
796 //loop through and create all the processes you want
797 while (process != processors) {
801 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
805 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
806 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
807 vector<vector<string> > tempNameFileNames = nameFileNames;
812 for(int i=0;i<tempFASTAFileNames.size();i++){
813 for(int j=0;j<tempFASTAFileNames[i].size();j++){
814 if (tempFASTAFileNames[i][j] != "") {
815 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
816 m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close();
819 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
820 m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close();
823 tempNameFileNames[i][j] += toString(getpid()) + ".temp";
824 m->openOutputFile(tempNameFileNames[i][j], temp); temp.close();
831 driverCreateTrim(filename,
833 (trimFASTAFileName + toString(getpid()) + ".temp"),
834 (scrapFASTAFileName + toString(getpid()) + ".temp"),
835 (trimQualFileName + toString(getpid()) + ".temp"),
836 (scrapQualFileName + toString(getpid()) + ".temp"),
837 (trimNameFileName + toString(getpid()) + ".temp"),
838 (scrapNameFileName + toString(getpid()) + ".temp"),
839 (groupFile + toString(getpid()) + ".temp"),
841 tempPrimerQualFileNames,
846 //pass groupCounts to parent
849 string tempFile = filename + toString(getpid()) + ".num.temp";
850 m->openOutputFile(tempFile, out);
852 out << groupCounts.size() << endl;
854 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
855 out << it->first << '\t' << it->second << endl;
861 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
862 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
869 m->openOutputFile(trimFASTAFileName, temp); temp.close();
870 m->openOutputFile(scrapFASTAFileName, temp); temp.close();
872 m->openOutputFile(trimQualFileName, temp); temp.close();
873 m->openOutputFile(scrapQualFileName, temp); temp.close();
875 if (nameFile != "") {
876 m->openOutputFile(trimNameFileName, temp); temp.close();
877 m->openOutputFile(scrapNameFileName, temp); temp.close();
880 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
882 //force parent to wait until all the processes are done
883 for (int i=0;i<processIDS.size();i++) {
884 int temp = processIDS[i];
889 for(int i=0;i<processIDS.size();i++){
891 m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
893 m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
894 m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
895 m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
896 m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
899 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
900 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
901 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
902 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
906 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
907 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
908 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
909 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
913 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
914 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
919 for(int j=0;j<fastaFileNames.size();j++){
920 for(int k=0;k<fastaFileNames[j].size();k++){
921 if (fastaFileNames[j][k] != "") {
922 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
923 m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
926 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
927 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
931 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
932 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
941 string tempFile = filename + toString(processIDS[i]) + ".num.temp";
942 m->openInputFile(tempFile, in);
946 in >> tempNum; m->gobble(in);
950 in >> group >> tempNum; m->gobble(in);
952 map<string, int>::iterator it = groupCounts.find(group);
953 if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
954 else { groupCounts[it->first] += tempNum; }
957 in.close(); m->mothurRemove(tempFile);
965 catch(exception& e) {
966 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
971 /**************************************************************************************************/
973 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
975 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
976 //set file positions for fasta file
977 fastaFilePos = m->divideFile(filename, processors);
979 if (qfilename == "") { return processors; }
981 //get name of first sequence in each chunk
982 map<string, int> firstSeqNames;
983 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
985 m->openInputFile(filename, in);
986 in.seekg(fastaFilePos[i]);
989 firstSeqNames[temp.getName()] = i;
994 //seach for filePos of each first name in the qfile and save in qfileFilePos
996 m->openInputFile(qfilename, inQual);
999 while(!inQual.eof()){
1000 input = m->getline(inQual);
1002 if (input.length() != 0) {
1003 if(input[0] == '>'){ //this is a sequence name line
1004 istringstream nameStream(input);
1006 string sname = ""; nameStream >> sname;
1007 sname = sname.substr(1);
1009 map<string, int>::iterator it = firstSeqNames.find(sname);
1011 if(it != firstSeqNames.end()) { //this is the start of a new chunk
1012 unsigned long long pos = inQual.tellg();
1013 qfileFilePos.push_back(pos - input.length() - 1);
1014 firstSeqNames.erase(it);
1019 if (firstSeqNames.size() == 0) { break; }
1024 if (firstSeqNames.size() != 0) {
1025 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1026 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1032 //get last file position of qfile
1034 unsigned long long size;
1036 //get num bytes in file
1037 pFile = fopen (qfilename.c_str(),"rb");
1038 if (pFile==NULL) perror ("Error opening file");
1040 fseek (pFile, 0, SEEK_END);
1045 qfileFilePos.push_back(size);
1051 fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1052 fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1057 catch(exception& e) {
1058 m->errorOut(e, "TrimSeqsCommand", "setLines");
1063 //***************************************************************************************************************
1065 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1068 m->openInputFile(oligoFile, inOligos);
1072 string type, oligo, group;
1074 int indexPrimer = 0;
1075 int indexBarcode = 0;
1077 while(!inOligos.eof()){
1082 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1083 m->gobble(inOligos);
1086 m->gobble(inOligos);
1087 //make type case insensitive
1088 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1092 for(int i=0;i<oligo.length();i++){
1093 oligo[i] = toupper(oligo[i]);
1094 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1097 if(type == "FORWARD"){
1100 // get rest of line in case there is a primer name
1101 while (!inOligos.eof()) {
1102 char c = inOligos.get();
1103 if (c == 10 || c == 13){ break; }
1104 else if (c == 32 || c == 9){;} //space or tab
1105 else { group += c; }
1108 //check for repeat barcodes
1109 map<string, int>::iterator itPrime = primers.find(oligo);
1110 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1112 primers[oligo]=indexPrimer; indexPrimer++;
1113 primerNameVector.push_back(group);
1115 else if(type == "REVERSE"){
1116 Sequence oligoRC("reverse", oligo);
1117 oligoRC.reverseComplement();
1118 revPrimer.push_back(oligoRC.getUnaligned());
1120 else if(type == "BARCODE"){
1123 //check for repeat barcodes
1124 map<string, int>::iterator itBar = barcodes.find(oligo);
1125 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1127 barcodes[oligo]=indexBarcode; indexBarcode++;
1128 barcodeNameVector.push_back(group);
1129 }else if(type == "LINKER"){
1130 linker.push_back(oligo);
1131 }else if(type == "SPACER"){
1132 spacer.push_back(oligo);
1134 else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1136 m->gobble(inOligos);
1140 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
1142 //add in potential combos
1143 if(barcodeNameVector.size() == 0){
1145 barcodeNameVector.push_back("");
1148 if(primerNameVector.size() == 0){
1150 primerNameVector.push_back("");
1153 fastaFileNames.resize(barcodeNameVector.size());
1154 for(int i=0;i<fastaFileNames.size();i++){
1155 fastaFileNames[i].assign(primerNameVector.size(), "");
1157 if(qFileName != "") { qualFileNames = fastaFileNames; }
1158 if(nameFile != "") { nameFileNames = fastaFileNames; }
1161 set<string> uniqueNames; //used to cleanup outputFileNames
1162 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1163 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1165 string primerName = primerNameVector[itPrimer->second];
1166 string barcodeName = barcodeNameVector[itBar->second];
1168 string comboGroupName = "";
1169 string fastaFileName = "";
1170 string qualFileName = "";
1171 string nameFileName = "";
1173 if(primerName == ""){
1174 comboGroupName = barcodeNameVector[itBar->second];
1177 if(barcodeName == ""){
1178 comboGroupName = primerNameVector[itPrimer->second];
1181 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1187 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1188 if (uniqueNames.count(fastaFileName) == 0) {
1189 outputNames.push_back(fastaFileName);
1190 outputTypes["fasta"].push_back(fastaFileName);
1191 uniqueNames.insert(fastaFileName);
1194 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1195 m->openOutputFile(fastaFileName, temp); temp.close();
1197 if(qFileName != ""){
1198 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1199 if (uniqueNames.count(qualFileName) == 0) {
1200 outputNames.push_back(qualFileName);
1201 outputTypes["qfile"].push_back(qualFileName);
1204 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1205 m->openOutputFile(qualFileName, temp); temp.close();
1209 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1210 if (uniqueNames.count(nameFileName) == 0) {
1211 outputNames.push_back(nameFileName);
1212 outputTypes["name"].push_back(nameFileName);
1215 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1216 m->openOutputFile(nameFileName, temp); temp.close();
1222 numFPrimers = primers.size();
1223 numRPrimers = revPrimer.size();
1224 numLinkers = linker.size();
1225 numSpacers = spacer.size();
1227 bool allBlank = true;
1228 for (int i = 0; i < barcodeNameVector.size(); i++) {
1229 if (barcodeNameVector[i] != "") {
1234 for (int i = 0; i < primerNameVector.size(); i++) {
1235 if (primerNameVector[i] != "") {
1242 m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
1250 catch(exception& e) {
1251 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1255 //***************************************************************************************************************
1257 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1260 if(qscores.getName() != ""){
1261 qscores.trimQScores(-1, keepFirst);
1263 sequence.trim(keepFirst);
1266 catch(exception& e) {
1267 m->errorOut(e, "keepFirstTrim", "countDiffs");
1273 //***************************************************************************************************************
1275 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1279 int length = sequence.getNumBases() - removeLast;
1282 if(qscores.getName() != ""){
1283 qscores.trimQScores(-1, length);
1285 sequence.trim(length);
1294 catch(exception& e) {
1295 m->errorOut(e, "removeLastTrim", "countDiffs");
1301 //***************************************************************************************************************
1303 bool TrimSeqsCommand::cullLength(Sequence& seq){
1306 int length = seq.getNumBases();
1307 bool success = 0; //guilty until proven innocent
1309 if(length >= minLength && maxLength == 0) { success = 1; }
1310 else if(length >= minLength && length <= maxLength) { success = 1; }
1311 else { success = 0; }
1316 catch(exception& e) {
1317 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1323 //***************************************************************************************************************
1325 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1327 int longHomoP = seq.getLongHomoPolymer();
1328 bool success = 0; //guilty until proven innocent
1330 if(longHomoP <= maxHomoP){ success = 1; }
1331 else { success = 0; }
1335 catch(exception& e) {
1336 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1342 //***************************************************************************************************************
1344 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1346 int numNs = seq.getAmbigBases();
1347 bool success = 0; //guilty until proven innocent
1349 if(numNs <= maxAmbig) { success = 1; }
1350 else { success = 0; }
1354 catch(exception& e) {
1355 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1360 //***************************************************************************************************************