CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
- CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
+ CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string PcrSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The pcr.seqs command reads a fasta file ...\n";
-
+ helpString += "The pcr.seqs command reads a fasta file.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+ helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
+ helpString += "The start parameter allows you to provide a starting position to trim to.\n";
+ helpString += "The end parameter allows you to provide a ending position to trim from.\n";
+ helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n";
+ helpString += "The processors parameter allows you to use multiple processors.\n";
+ helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
+ helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
}
}
-
+//**********************************************************************************************************************
+string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pcr.fasta"; }
+ else if (type == "taxonomy") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "accnos") { outputFileName = "bad.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PcrSeqsCommand::PcrSeqsCommand(){
}
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
}else if (fastafile == "not open") { fastafile = ""; abort = true; }
else { m->setFastaFile(fastafile); }
-
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
string temp;
temp = validParameter.validFile(parameters, "keepprimer", false); if (temp == "not found") { temp = "f"; }
keepprimer = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "keepdots", false); if (temp == "not found") { temp = "t"; }
+ keepdots = m->isTrue(temp);
+
temp = validParameter.validFile(parameters, "oligos", true);
if (temp == "not found"){ oligosfile = ""; }
else if(temp == "not open"){ oligosfile = ""; abort = true; }
if (taxfile == "not found"){ taxfile = ""; }
else if(taxfile == "not open"){ taxfile = ""; abort = true; }
else { m->setTaxonomyFile(taxfile); }
-
- temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, pdiffs);
-
+
temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
m->mothurConvert(temp, start);
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
+ string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
- outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile);
+ string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta");
+
length = 0;
if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
if (m->control_pressed) { return 0; }
- writeAccnos(badNames);
+ //don't write or keep if blank
+ if (badNames.size() != 0) { writeAccnos(badNames); }
+ if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile); }
+ else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
+
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (namefile != "") { readName(badNames); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, start, end, length, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
else{
//are you aligned
if (aligned) {
- if (!keepprimer) { currSeq.filterToPos(mapAligned[primerEnd]); }
- else { currSeq.filterToPos(mapAligned[primerStart]); }
+ if (!keepprimer) {
+ if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
+ }
+ else {
+ if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
+ }
}else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
else{
//are you aligned
if (aligned) {
- if (!keepprimer) { currSeq.filterFromPos(mapAligned[primerStart]); }
- else { currSeq.filterFromPos(mapAligned[primerEnd]); }
+ if (!keepprimer) {
+ if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
+ }
+ else {
+ if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
+ }
}
else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); }
else if (currSeq.getAligned().length() != length) {
m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break;
}else {
- currSeq.filterToPos(start);
- currSeq.filterFromPos(end);
+ if (keepdots) {
+ currSeq.filterToPos(start);
+ currSeq.filterFromPos(end);
+ }else {
+ string seqString = currSeq.getAligned().substr(0, end);
+ seqString = seqString.substr(start);
+ currSeq.setAligned(seqString);
+ }
}
}else{ //using start and end to trim
//make sure the seqs are aligned
lengths.insert(currSeq.getAligned().length());
if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
else {
- if (start != -1) { currSeq.filterToPos(start); }
if (end != -1) {
if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
else {
- currSeq.filterFromPos(end);
+ if (keepdots) { currSeq.filterFromPos(end); }
+ else {
+ string seqString = currSeq.getAligned().substr(0, end);
+ currSeq.setAligned(seqString);
+ }
+ }
+ }
+ if (start != -1) {
+ if (keepdots) { currSeq.filterToPos(start); }
+ else {
+ string seqString = currSeq.getAligned().substr(start);
+ currSeq.setAligned(seqString);
}
}
}
}
-
+
+ //trimming removed all bases
+ if (currSeq.getUnaligned() == "") { goodSeq = false; }
+
if(goodSeq == 1) { currSeq.printSequence(goodFile); }
else {
badSeqNames.insert(currSeq.getName());
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);