try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
- CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup); CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
string RemoveGroupsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or sharedfile.\n";
+ helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design or sharedfile.\n";
helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n";
- helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file or are using a sharedfile.\n";
+ helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n";
helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed. You can separate group names with dashes.\n";
helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pick.count_table"; }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
outputTypes["list"] = tempOutNames;
outputTypes["shared"] = tempOutNames;
outputTypes["design"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
outputTypes["list"] = tempOutNames;
outputTypes["shared"] = tempOutNames;
outputTypes["design"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["design"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
else if (sharedfile == "not found") { sharedfile = ""; }
else { m->setSharedFile(sharedfile); }
- groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { groupfile = ""; abort = true; }
- else if (groupfile == "not found") { groupfile = ""; }
- else { m->setGroupFile(groupfile); }
- if ((sharedfile == "") && (groupfile == "") && (designfile == "")) {
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
+
+ if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) {
//is there are current file available for any of these?
if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
//give priority to group, then shared
sharedfile = m->getSharedFile();
if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ }
}
}
}else {
designfile = m->getDesignFile();
if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ }
+
}
}
}
if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (designfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; }
- if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
-
- if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
- vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
- parser.getNameFile(files);
- }
-
+ if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
+ if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
+
+ if (countfile == "") {
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+ }
}
}
fillNames();
delete groupMap;
- }
+ }else if (countfile != ""){
+ if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
+ m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+ }
+ CountTable ct;
+ ct.readTable(countfile);
+ if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
+
+ vector<string> gNamesOfGroups = ct.getNamesOfGroups();
+ SharedUtil util;
+ util.setGroups(Groups, gNamesOfGroups);
+ vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+ sort(Groups.begin(), Groups.end());
+
+ for (int i = 0; i < namesOfSeqs.size(); i++) {
+ vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
+ if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
+ names.insert(namesOfSeqs[i]);
+ }
+ }
+ }
+
if (m->control_pressed) { return 0; }
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
if (groupfile != "") { readGroup(); }
+ if (countfile != "") { readCount(); }
if (listfile != "") { readList(); }
if (taxfile != "") { readTax(); }
if (sharedfile != "") { readShared(); }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
}
return 0;
}
}
//**********************************************************************************************************************
+int RemoveGroupsCommand::readCount(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(countfile, in);
+
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ string headers = m->getline(in); m->gobble(in);
+ vector<string> columnHeaders = m->splitWhiteSpace(headers);
+
+ vector<string> groups;
+ map<int, string> originalGroupIndexes;
+ map<string, int> GroupIndexes;
+ set<int> indexOfGroupsChosen;
+ for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; }
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(groups.begin(), groups.end());
+ for (int i = 0; i < groups.size(); i++) { GroupIndexes[groups[i]] = i; }
+
+ vector<string> groupsToKeep;
+ for (int i = 0; i < groups.size(); i++) {
+ if (!m->inUsersGroups(groups[i], Groups)) { groupsToKeep.push_back(groups[i]); }
+ }
+ sort(groupsToKeep.begin(), groupsToKeep.end());
+ out << "Representative_Sequence\ttotal\t";
+ for (int i = 0; i < groupsToKeep.size(); i++) { out << groupsToKeep[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[groupsToKeep[i]]); }
+ out << endl;
+
+ string name; int oldTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
+
+ if (names.count(name) == 0) {
+ //if group info, then read it
+ vector<int> selectedCounts; int thisTotal = 0; int temp;
+ for (int i = 0; i < groups.size(); i++) {
+ int thisIndex = GroupIndexes[originalGroupIndexes[i]];
+ in >> temp; m->gobble(in);
+ if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
+ selectedCounts.push_back(temp); thisTotal += temp;
+ }
+ }
+
+ out << name << '\t' << thisTotal << '\t';
+ for (int i = 0; i < selectedCounts.size(); i++) { out << selectedCounts[i] << '\t'; }
+ out << endl;
+
+ wroteSomething = true;
+ removedCount+= (oldTotal - thisTotal);
+ }else { m->getline(in); removedCount += oldTotal; }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
+ outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveGroupsCommand", "readCount");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int RemoveGroupsCommand::readDesign(){
try {
string thisOutputDir = outputDir;