CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
- CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
+ CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string PcrSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The pcr.seqs command reads a fasta file ...\n";
-
+ helpString += "The pcr.seqs command reads a fasta file.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+ helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
+ helpString += "The start parameter allows you to provide a starting position to trim to.\n";
+ helpString += "The end parameter allows you to provide a ending position to trim from.\n";
+ helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n";
+ helpString += "The processors parameter allows you to use multiple processors.\n";
+ helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
+ helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
}
}
-
+//**********************************************************************************************************************
+string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pcr.fasta"; }
+ else if (type == "taxonomy") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "accnos") { outputFileName = "bad.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PcrSeqsCommand::PcrSeqsCommand(){
}
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
}else if (fastafile == "not open") { fastafile = ""; abort = true; }
else { m->setFastaFile(fastafile); }
-
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
string temp;
temp = validParameter.validFile(parameters, "keepprimer", false); if (temp == "not found") { temp = "f"; }
keepprimer = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "keepdots", false); if (temp == "not found") { temp = "t"; }
+ keepdots = m->isTrue(temp);
+
temp = validParameter.validFile(parameters, "oligos", true);
if (temp == "not found"){ oligosfile = ""; }
else if(temp == "not open"){ oligosfile = ""; abort = true; }
if (taxfile == "not found"){ taxfile = ""; }
else if(taxfile == "not open"){ taxfile = ""; abort = true; }
else { m->setTaxonomyFile(taxfile); }
-
- temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, pdiffs);
-
+
temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
m->mothurConvert(temp, start);
m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
}
- if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end != -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
+ if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
if ((ecolifile != "") && (start != -1) && (end != -1)) {
m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend. When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
+ string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
- outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile);
+ string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta");
+
length = 0;
if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
if (m->control_pressed) { return 0; }
- writeAccnos(badNames);
+ //don't write or keep if blank
+ if (badNames.size() != 0) { writeAccnos(badNames); }
+ if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile); }
+ else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
+
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (namefile != "") { readName(badNames); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, start, end, length, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
else{
//are you aligned
if (aligned) {
- if (!keepprimer) { currSeq.padToPos(mapAligned[primerEnd]); }
- else { currSeq.padToPos(mapAligned[primerStart]); }
+ if (!keepprimer) {
+ if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
+ }
+ else {
+ if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
+ }
}else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
else{
//are you aligned
if (aligned) {
- if (!keepprimer) { currSeq.padFromPos(mapAligned[primerStart]); }
- else { currSeq.padFromPos(mapAligned[primerEnd]); }
+ if (!keepprimer) {
+ if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
+ }
+ else {
+ if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
+ }
}
else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); }
else if (currSeq.getAligned().length() != length) {
m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break;
}else {
- currSeq.padToPos(start);
- currSeq.padFromPos(end);
+ if (keepdots) {
+ currSeq.filterToPos(start);
+ currSeq.filterFromPos(end);
+ }else {
+ string seqString = currSeq.getAligned().substr(0, end);
+ seqString = seqString.substr(start);
+ currSeq.setAligned(seqString);
+ }
}
}else{ //using start and end to trim
//make sure the seqs are aligned
lengths.insert(currSeq.getAligned().length());
if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
else {
- if (start != -1) { currSeq.padToPos(start); }
if (end != -1) {
if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
else {
- currSeq.padFromPos(end);
+ if (keepdots) { currSeq.filterFromPos(end); }
+ else {
+ string seqString = currSeq.getAligned().substr(0, end);
+ currSeq.setAligned(seqString);
+ }
+ }
+ }
+ if (start != -1) {
+ if (keepdots) { currSeq.filterToPos(start); }
+ else {
+ string seqString = currSeq.getAligned().substr(start);
+ currSeq.setAligned(seqString);
}
}
}
}
-
+
+ //trimming removed all bases
+ if (currSeq.getUnaligned() == "") { goodSeq = false; }
+
if(goodSeq == 1) { currSeq.printSequence(goodFile); }
else {
badSeqNames.insert(currSeq.getName());
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);