try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
- CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
string PcrSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The pcr.seqs command reads a fasta file ...\n";
-
+ helpString += "The pcr.seqs command reads a fasta file.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+ helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
+ helpString += "The start parameter allows you to provide a starting position to trim to.\n";
+ helpString += "The end parameter allows you to provide a ending position to trim from.\n";
+ helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n";
+ helpString += "The processors parameter allows you to use multiple processors.\n";
+ helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
+ helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
}
}
-
+//**********************************************************************************************************************
+string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "pcr.fasta"; }
+ else if (type == "taxonomy") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "group") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "name") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pcr" + m->getExtension(inputName); }
+ else if (type == "accnos") { outputFileName = "bad.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
PcrSeqsCommand::PcrSeqsCommand(){
outputTypes["taxonomy"] = tempOutNames;
outputTypes["group"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
}
catch(exception& e) {
outputTypes["group"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
}else if (fastafile == "not open") { fastafile = ""; abort = true; }
else { m->setFastaFile(fastafile); }
-
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
string temp;
else if(groupfile == "not open"){ groupfile = ""; abort = true; }
else { m->setGroupFile(groupfile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
taxfile = validParameter.validFile(parameters, "taxonomy", true);
if (taxfile == "not found"){ taxfile = ""; }
else if(taxfile == "not open"){ taxfile = ""; abort = true; }
else { m->setTaxonomyFile(taxfile); }
-
- temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, pdiffs);
-
+
temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
m->mothurConvert(temp, start);
}
//check to make sure you didn't forget the name file by mistake
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
+ string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
- string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
- outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile);
+ string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta");
+
length = 0;
if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
//don't write or keep if blank
if (badNames.size() != 0) { writeAccnos(badNames); }
if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile); }
+ else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (namefile != "") { readName(badNames); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (taxfile != "") { readTax(badNames); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
+ if (countfile != "") { readCount(badNames); }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
}
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
+
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
m->mothurOutEndLine();
}
}
}
-
+
+ //trimming removed all bases
+ if (currSeq.getUnaligned() == "") { goodSeq = false; }
+
if(goodSeq == 1) { currSeq.printSequence(goodFile); }
else {
badSeqNames.insert(currSeq.getName());
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
ofstream out;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
ofstream out;
m->openOutputFile(outputFileName, out);
exit(1);
}
}
+//***************************************************************************************************************
+int PcrSeqsCommand::readCount(set<string> badSeqNames){
+ try {
+ ifstream in;
+ m->openInputFile(countfile, in);
+ set<string>::iterator it;
+
+ string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+ outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile);
+ ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut);
+
+ string headers = m->getline(in); m->gobble(in);
+ goodCountOut << headers << endl;
+
+ string name, rest; int thisTotal, removedCount; removedCount = 0;
+ bool wroteSomething = false;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+
+ if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
+ else{
+ wroteSomething = true;
+ goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
+ }
+ }
+ in.close();
+ goodCountOut.close();
+
+ if (m->control_pressed) { m->mothurRemove(goodCountFile); }
+
+ if (wroteSomething == false) { m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(goodCountFile)) {
+ ct.readTable(goodCountFile);
+ ct.printTable(goodCountFile);
+ }
+
+ if (m->control_pressed) { m->mothurRemove(goodCountFile); }
+
+ m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "readCOunt");
+ exit(1);
+ }
+}
/**************************************************************************************/