//***************************************************************************************************************
-TrimSeqsCommand::TrimSeqsCommand(string option){
+TrimSeqsCommand::TrimSeqsCommand(string option) {
try {
abort = false;
else {
//valid paramters for this command
- string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qaverage", "allfiles"};
+ string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile",
+ "qthreshold", "qaverage", "allfiles", "qtrim", "outputdir","inputdir"};
vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
map<string,string> parameters = parser.getParameters();
ValidParameters validParameter;
+ map<string,string>::iterator it;
//check to make sure all parameters are valid for command
- for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("oligos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["oligos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("qfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["qfile"] = inputDir + it->second; }
+ }
+ }
+
+
//check for required parameters
fastaFile = validParameter.validFile(parameters, "fasta", true);
- if (fastaFile == "not found") { mothurOut("fasta is a required parameter for the screen.seqs command."); mothurOutEndLine(); abort = true; }
+ if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
else if (fastaFile == "not open") { abort = true; }
-
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = "";
+ outputDir += hasPath(fastaFile); //if user entered a file with a path then preserve it
+ }
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
temp = validParameter.validFile(parameters, "qfile", true);
if (temp == "not found") { qFileName = ""; }
- else if(temp == "not open") { abort = 0; }
+ else if(temp == "not open") { abort = true; }
else { qFileName = temp; }
temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
convert(temp, qThreshold);
+
+ temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "F"; }
+ qtrim = isTrue(temp);
temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; }
convert(temp, qAverage);
allFiles = isTrue(temp);
if(allFiles && oligoFile == ""){
- mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); mothurOutEndLine();
+ m->mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); m->mothurOutEndLine();
}
if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
- mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); mothurOutEndLine();
+ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
qAverage=0;
qThreshold=0;
}
if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){
- mothurOut("You didn't set any options... quiting command."); mothurOutEndLine();
+ m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
abort = true;
}
}
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
+ m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
exit(1);
}
}
void TrimSeqsCommand::help(){
try {
- mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
- mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength.\n");
- mothurOut("The fasta parameter is required.\n");
- mothurOut("The flip parameter .... The default is 0.\n");
- mothurOut("The oligos parameter .... The default is "".\n");
- mothurOut("The maxambig parameter .... The default is -1.\n");
- mothurOut("The maxhomop parameter .... The default is 0.\n");
- mothurOut("The minlength parameter .... The default is 0.\n");
- mothurOut("The maxlength parameter .... The default is 0.\n");
- mothurOut("The trim.seqs command should be in the following format: \n");
- mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
- mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
- mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
- mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+ m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
+ m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, qtrim and allfiles.\n");
+ m->mothurOut("The fasta parameter is required.\n");
+ m->mothurOut("The flip parameter .... The default is 0.\n");
+ m->mothurOut("The oligos parameter .... The default is "".\n");
+ m->mothurOut("The maxambig parameter .... The default is -1.\n");
+ m->mothurOut("The maxhomop parameter .... The default is 0.\n");
+ m->mothurOut("The minlength parameter .... The default is 0.\n");
+ m->mothurOut("The maxlength parameter .... The default is 0.\n");
+ m->mothurOut("The qfile parameter .....\n");
+ m->mothurOut("The qthreshold parameter .... The default is 0.\n");
+ m->mothurOut("The qaverage parameter .... The default is 0.\n");
+ m->mothurOut("The allfiles parameter .... The default is F.\n");
+ m->mothurOut("The qtrim parameter .... The default is F.\n");
+ m->mothurOut("The trim.seqs command should be in the following format: \n");
+ m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n");
+ m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
+ m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
+ m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
+ m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "help");
+ m->errorOut(e, "TrimSeqsCommand", "help");
exit(1);
}
}
try{
if (abort == true) { return 0; }
-
+
+ vector<string> outputNames;
+
+ numFPrimers = 0; //this needs to be initialized
+ numRPrimers = 0;
+
ifstream inFASTA;
openInputFile(fastaFile, inFASTA);
ofstream outFASTA;
- string trimSeqFile = getRootName(fastaFile) + "trim.fasta";
+ string trimSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "trim.fasta";
openOutputFile(trimSeqFile, outFASTA);
+ outputNames.push_back(trimSeqFile);
ofstream outGroups;
vector<ofstream*> fastaFileNames;
if(oligoFile != ""){
- string groupFile = getRootName(fastaFile) + "groups";
+ string groupFile = outputDir + getRootName(getSimpleName(fastaFile)) + "groups";
openOutputFile(groupFile, outGroups);
+ outputNames.push_back(groupFile);
getOligos(fastaFileNames);
}
ofstream scrapFASTA;
- string scrapSeqFile = getRootName(fastaFile) + "scrap.fasta";
+ string scrapSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "scrap.fasta";
openOutputFile(scrapSeqFile, scrapFASTA);
+ outputNames.push_back(scrapSeqFile);
ifstream qFile;
if(qFileName != "") { openInputFile(qFileName, qFile); }
bool success;
-
+
while(!inFASTA.eof()){
Sequence currSeq(inFASTA);
+
string origSeq = currSeq.getUnaligned();
- int group;
- string trashCode = "";
-
- if(qFileName != ""){
- if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
- else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
- if(!success) { trashCode += 'q'; }
- }
- if(barcodes.size() != 0){
-
- success = stripBarcode(currSeq, group);
- if(!success){ trashCode += 'b'; }
- }
- if(numFPrimers != 0){
- success = stripForward(currSeq);
- if(!success){ trashCode += 'f'; }
- }
- if(numRPrimers != 0){
- success = stripReverse(currSeq);
- if(!success){ trashCode += 'r'; }
- }
- if(minLength > 0 || maxLength > 0){
- success = cullLength(currSeq);
- if ((currSeq.getUnaligned().length() > 300) && (success)) { cout << "too long " << currSeq.getUnaligned().length() << endl; }
- if(!success){ trashCode += 'l'; }
- }
- if(maxHomoP > 0){
- success = cullHomoP(currSeq);
- if(!success){ trashCode += 'h'; }
- }
- if(maxAmbig != -1){
- success = cullAmbigs(currSeq);
- if(!success){ trashCode += 'n'; }
- }
-
- if(flip){ currSeq.reverseComplement(); } // should go last
+ if (origSeq != "") {
+ int group;
+ string trashCode = "";
+
+ if(qFileName != ""){
+ if(qThreshold != 0) { success = stripQualThreshold(currSeq, qFile); }
+ else if(qAverage != 0) { success = cullQualAverage(currSeq, qFile); }
+ if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) {
+ success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+ }
+ if(!success) { trashCode += 'q'; }
+ }
- if(trashCode.length() == 0){
- currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed.
- currSeq.printSequence(outFASTA);
if(barcodes.size() != 0){
- outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
+ success = stripBarcode(currSeq, group);
+ if(!success){ trashCode += 'b'; }
+ }
+
+ if(numFPrimers != 0){
+ success = stripForward(currSeq);
+ if(!success){ trashCode += 'f'; }
+ }
- if(allFiles){
- currSeq.printSequence(*fastaFileNames[group]);
+ if(numRPrimers != 0){
+ success = stripReverse(currSeq);
+ if(!success){ trashCode += 'r'; }
+ }
+
+ if(minLength > 0 || maxLength > 0){
+ success = cullLength(currSeq);
+ if(!success){ trashCode += 'l'; }
+ }
+ if(maxHomoP > 0){
+ success = cullHomoP(currSeq);
+ if(!success){ trashCode += 'h'; }
+ }
+ if(maxAmbig != -1){
+ success = cullAmbigs(currSeq);
+ if(!success){ trashCode += 'n'; }
+ }
+
+ if(flip){ currSeq.reverseComplement(); } // should go last
+
+ if(trashCode.length() == 0){
+ currSeq.setAligned(currSeq.getUnaligned()); //this is because of a modification we made to the sequence class to fix a bug. all seqs have an aligned version, which is the version that gets printed.
+ currSeq.printSequence(outFASTA);
+ if(barcodes.size() != 0){
+ outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
+
+ if(allFiles){
+ currSeq.printSequence(*fastaFileNames[group]);
+ }
}
}
- }
- else{
- currSeq.setName(currSeq.getName() + '|' + trashCode);
- currSeq.setUnaligned(origSeq);
- currSeq.printSequence(scrapFASTA);
+ else{
+ currSeq.setName(currSeq.getName() + '|' + trashCode);
+ currSeq.setUnaligned(origSeq);
+ currSeq.printSequence(scrapFASTA);
+ }
}
gobble(inFASTA);
}
string seqName;
openInputFile(getRootName(fastaFile) + groupVector[i] + ".fasta", inFASTA);
ofstream outGroups;
- openOutputFile(getRootName(fastaFile) + groupVector[i] + ".groups", outGroups);
+ openOutputFile(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups", outGroups);
+ outputNames.push_back(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups");
while(!inFASTA.eof()){
if(inFASTA.get() == '>'){
inFASTA.close();
}
-
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
return 0;
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "execute");
+ m->errorOut(e, "TrimSeqsCommand", "execute");
exit(1);
}
}
forPrimer.push_back(oligo);
}
else if(type == "reverse"){
- revPrimer.push_back(oligo);
+ Sequence oligoRC("reverse", oligo);
+ oligoRC.reverseComplement();
+ revPrimer.push_back(oligoRC.getUnaligned());
}
else if(type == "barcode"){
inOligos >> group;
groupVector.push_back(group);
if(allFiles){
- outFASTAVec.push_back(new ofstream((getRootName(fastaFile) + group + ".fasta").c_str(), ios::ate));
+ outFASTAVec.push_back(new ofstream((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta").c_str(), ios::ate));
}
}
}
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "getOligos");
+ m->errorOut(e, "TrimSeqsCommand", "getOligos");
exit(1);
}
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "stripBarcode");
+ m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
exit(1);
}
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "stripForward");
+ m->errorOut(e, "TrimSeqsCommand", "stripForward");
exit(1);
}
}
}
if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
- seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
+ seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
success = 1;
break;
}
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "stripReverse");
+ m->errorOut(e, "TrimSeqsCommand", "stripReverse");
exit(1);
}
}
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "cullLength");
+ m->errorOut(e, "TrimSeqsCommand", "cullLength");
exit(1);
}
return success;
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "cullHomoP");
+ m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
exit(1);
}
return success;
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "cullAmbigs");
+ m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
exit(1);
}
return success;
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "compareDNASeq");
+ m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
exit(1);
}
string name;
qFile >> name;
- if (name.length() != 0) { if(name.substr(1) != seq.getName()) { mothurOut("sequence name mismatch btwn fasta and qual file"); mothurOutEndLine(); } }
+ if (name.length() != 0) { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); } }
while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } }
int score;
return 1;
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
+ m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
exit(1);
}
}
string name;
qFile >> name;
- if (name[0] == '>') { if(name.substr(1) != seq.getName()) { mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); mothurOutEndLine(); } }
+ if (name[0] == '>') { if(name.substr(1) != seq.getName()) { m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); } }
while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13){ break; } }
return success;
}
catch(exception& e) {
- errorOut(e, "TrimSeqsCommand", "cullQualAverage");
+ m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
exit(1);
}
}