//**********************************************************************************************************************
vector<string> RemoveGroupsCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
- CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
- CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
- CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
- CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
- CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT","fasta",false,false,true); parameters.push_back(pfasta);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false,true); parameters.push_back(pshared);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT","group",false,false,true); parameters.push_back(pgroup);
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT","design",false,false); parameters.push_back(pdesign);
+ CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT","list",false,false,true); parameters.push_back(plist);
+ CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
+ CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
string RemoveGroupsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or sharedfile.\n";
+ helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design or sharedfile.\n";
helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n";
- helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file or are using a sharedfile.\n";
+ helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n";
helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed. You can separate group names with dashes.\n";
helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
}
}
//**********************************************************************************************************************
-string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
- try {
- string outputFileName = "";
- map<string, vector<string> >::iterator it;
+string RemoveGroupsCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
- //is this a type this command creates
- it = outputTypes.find(type);
- if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
- else {
- if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
- }
- return outputFileName;
- }
- catch(exception& e) {
- m->errorOut(e, "RemoveGroupsCommand", "getOutputFileNameTag");
- exit(1);
- }
+ if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "name") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "group") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "count") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; }
+ else if (type == "design") { pattern = "[filename],pick,[extension]"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveGroupsCommand", "getOutputPattern");
+ exit(1);
+ }
}
//**********************************************************************************************************************
RemoveGroupsCommand::RemoveGroupsCommand(){
outputTypes["list"] = tempOutNames;
outputTypes["shared"] = tempOutNames;
outputTypes["design"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
outputTypes["list"] = tempOutNames;
outputTypes["shared"] = tempOutNames;
outputTypes["design"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["design"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
else if (sharedfile == "not found") { sharedfile = ""; }
else { m->setSharedFile(sharedfile); }
- groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { groupfile = ""; abort = true; }
- else if (groupfile == "not found") { groupfile = ""; }
- else { m->setGroupFile(groupfile); }
- if ((sharedfile == "") && (groupfile == "") && (designfile == "")) {
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
+
+ if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) {
//is there are current file available for any of these?
if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
//give priority to group, then shared
sharedfile = m->getSharedFile();
if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ }
}
}
}else {
designfile = m->getDesignFile();
if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ }
+
}
}
}
if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (designfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; }
- if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
-
- if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
- vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
- parser.getNameFile(files);
- }
-
+ if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
+ if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
+
+ if (countfile == "") {
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+ }
}
}
//make sure groups are valid
//takes care of user setting groupNames that are invalid or setting groups=all
- SharedUtil* util = new SharedUtil();
vector<string> namesGroups = groupMap->getNamesOfGroups();
- util->setGroups(Groups, namesGroups);
- delete util;
-
+ vector<string> checkedGroups;
+ for (int i = 0; i < Groups.size(); i++) {
+ if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
+ else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
+ }
+
+ if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
+ else {
+ Groups = checkedGroups;
+ m->setGroups(Groups);
+ }
+
//fill names with names of sequences that are from the groups we want to remove
fillNames();
delete groupMap;
- }
+ }else if (countfile != ""){
+ if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
+ m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+ }
+ CountTable ct;
+ ct.readTable(countfile, true);
+ if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
+
+ vector<string> gNamesOfGroups = ct.getNamesOfGroups();
+ SharedUtil util;
+ util.setGroups(Groups, gNamesOfGroups);
+ vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+ sort(Groups.begin(), Groups.end());
+
+ for (int i = 0; i < namesOfSeqs.size(); i++) {
+ vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
+ if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
+ names.insert(namesOfSeqs[i]);
+ }
+ }
+ }
+
if (m->control_pressed) { return 0; }
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
if (groupfile != "") { readGroup(); }
+ if (countfile != "") { readCount(); }
if (listfile != "") { readList(); }
if (taxfile != "") { readTax(); }
if (sharedfile != "") { readShared(); }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
}
return 0;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
+ variables["[extension]"] = m->getExtension(fastafile);
+ string outputFileName = getOutputFileName("fasta", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
//that way we can take advantage of the reads in inputdata and sharedRabundVector
InputData* tempInput = new InputData(sharedfile, "sharedfile");
vector<SharedRAbundVector*> lookup = tempInput->getSharedRAbundVectors();
+
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[extension]"] = m->getExtension(sharedfile);
//save m->Groups
vector<string> allGroupsNames = m->getAllGroups();
while(lookup[0] != NULL) {
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("shared", sharedfile);
+ variables["[tag]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("shared", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+ variables["[extension]"] = m->getExtension(listfile);
+ string outputFileName = getOutputFileName("list", variables);
+
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
+ variables["[extension]"] = m->getExtension(namefile);
+ string outputFileName = getOutputFileName("name", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+ variables["[extension]"] = m->getExtension(groupfile);
+ string outputFileName = getOutputFileName("group", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
}
}
//**********************************************************************************************************************
+int RemoveGroupsCommand::readCount(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+ variables["[extension]"] = m->getExtension(countfile);
+ string outputFileName = getOutputFileName("count", variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(countfile, in);
+
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ string headers = m->getline(in); m->gobble(in);
+ vector<string> columnHeaders = m->splitWhiteSpace(headers);
+
+ vector<string> groups;
+ map<int, string> originalGroupIndexes;
+ map<string, int> GroupIndexes;
+ set<int> indexOfGroupsChosen;
+ for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; }
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(groups.begin(), groups.end());
+ for (int i = 0; i < groups.size(); i++) { GroupIndexes[groups[i]] = i; }
+
+ vector<string> groupsToKeep;
+ for (int i = 0; i < groups.size(); i++) {
+ if (!m->inUsersGroups(groups[i], Groups)) { groupsToKeep.push_back(groups[i]); }
+ }
+ sort(groupsToKeep.begin(), groupsToKeep.end());
+ out << "Representative_Sequence\ttotal\t";
+ for (int i = 0; i < groupsToKeep.size(); i++) { out << groupsToKeep[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[groupsToKeep[i]]); }
+ out << endl;
+
+ string name; int oldTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
+
+ if (names.count(name) == 0) {
+ //if group info, then read it
+ vector<int> selectedCounts; int thisTotal = 0; int temp;
+ for (int i = 0; i < groups.size(); i++) {
+ int thisIndex = GroupIndexes[originalGroupIndexes[i]];
+ in >> temp; m->gobble(in);
+ if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
+ selectedCounts.push_back(temp); thisTotal += temp;
+ }
+ }
+
+ out << name << '\t' << thisTotal << '\t';
+ for (int i = 0; i < selectedCounts.size(); i++) { out << selectedCounts[i] << '\t'; }
+ out << endl;
+
+ wroteSomething = true;
+ removedCount+= (oldTotal - thisTotal);
+ }else { m->getline(in); removedCount += oldTotal; }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
+ outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveGroupsCommand", "readCount");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int RemoveGroupsCommand::readDesign(){
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(designfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + getOutputFileNameTag("design", designfile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(designfile));
+ variables["[extension]"] = m->getExtension(designfile);
+ string outputFileName = getOutputFileName("design", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
+ variables["[extension]"] = m->getExtension(taxfile);
+ string outputFileName = getOutputFileName("taxonomy", variables);
ofstream out;
m->openOutputFile(outputFileName, out);