*/
#include "screenseqscommand.h"
-
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> ScreenSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport);
CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
string ScreenSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The screen.seqs command reads a fastafile and creates .....\n";
- helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
+ helpString += "The screen.seqs command reads a fastafile and screens sequences.\n";
+ helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
if (type == "fasta") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "taxonomy") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "good" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "good" + m->getExtension(inputName); }
else if (type == "accnos") { outputFileName = "bad.accnos"; }
else if (type == "qfile") { outputFileName = "good" + m->getExtension(inputName); }
outputTypes["accnos"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
outputTypes["accnos"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
alignreport = validParameter.validFile(parameters, "alignreport", true);
if (alignreport == "not open") { abort = true; }
else if (alignreport == "not found") { alignreport = ""; }
temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){ temp = "90"; }
m->mothurConvert(temp, criteria);
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
if (optimize.size() != 0) { //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
//use the namefile to optimize correctly
if (namefile != "") { nameMap = m->readNames(namefile); }
+ else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ nameMap = ct.getNameMap();
+ }
getSummary(positions);
}
else {
screenNameGroupFile(badSeqNames);
if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
}else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the group
-
+ else if (countfile != "") { screenCountFile(badSeqNames); }
+
+
if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
if(alignreport != "") { screenAlignReport(badSeqNames); }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
m->mothurOutEndLine();
exit(1);
}
}
+//***************************************************************************************************************
+int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
+ try {
+ ifstream in;
+ m->openInputFile(countfile, in);
+ set<string>::iterator it;
+
+ string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+ outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile);
+ ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut);
+
+ string headers = m->getline(in); m->gobble(in);
+ goodCountOut << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+
+ it = badSeqNames.find(name);
+
+ if(it != badSeqNames.end()){
+ badSeqNames.erase(it);
+ }
+ else{
+ goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
+ }
+ }
+
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ in.close();
+ goodCountOut.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(goodCountFile)) {
+ ct.readTable(goodCountFile);
+ ct.printTable(goodCountFile);
+ }
+
+ if (m->control_pressed) { m->mothurRemove(goodCountFile); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenCountFile");
+ exit(1);
+ }
+}
//***************************************************************************************************************
int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){