*/
#include "splitabundcommand.h"
+#include "sharedutilities.h"
//**********************************************************************************************************************
vector<string> SplitAbundCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
- CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
- CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
- CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
- CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup);
+ CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pcutoff);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(paccnos);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
try {
string helpString = "";
helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
- helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
- helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
+ helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label, groups, cutoff and accnos.\n";
+ helpString += "The fasta and a list or name or count parameter are required, and you must provide a cutoff value.\n";
helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
}
}
+//**********************************************************************************************************************
+string SplitAbundCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "fasta") { pattern = "[filename],[tag],[tag2],fasta-[filename],[tag],[group],[tag2],fasta"; }
+ else if (type == "list") { pattern = "[filename],[tag],[tag2],list-[filename],[group],[tag],[tag2],list"; }
+ else if (type == "name") { pattern = "[filename],[tag],names-[filename],[group],[tag],names"; }
+ else if (type == "count") { pattern = "[filename],[tag],[tag2],count_table-[filename],[tag],count_table"; }
+ else if (type == "group") { pattern = "[filename],[tag],[tag2],groups-[filename],[tag],[group],[tag2],groups"; }
+ else if (type == "accnos") { pattern = "[filename],[tag],[tag2],accnos-[filename],[tag],[group],[tag2],accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitAbundCommand", "getOutputPattern");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
SplitAbundCommand::SplitAbundCommand(){
try {
vector<string> tempOutNames;
outputTypes["list"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
outputTypes["group"] = tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
outputTypes["group"] = tempOutNames;
- outputTypes["fasta"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (groupfile == "not open") { groupfile = ""; abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else {
- groupMap = new GroupMap(groupfile);
-
- int error = groupMap->readMap();
+ int error = groupMap.readMap(groupfile);
if (error == 1) { abort = true; }
m->setGroupFile(groupfile);
}
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else {
+ m->setCountTableFile(countfile);
+ ct.readTable(countfile, true);
+ }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
- else if (groups == "all") {
- if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
- else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
- }else {
- m->splitAtDash(groups, Groups);
- }
+ else { m->splitAtDash(groups, Groups); }
- if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
+ if (((groupfile == "") && (countfile == ""))&& (groups != "")) { m->mothurOut("You cannot select groups without a valid group or count file, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
+ if (countfile != "") {
+ if (!ct.hasGroupInfo()) { m->mothurOut("You cannot pick groups without group info in your count file; I will disregard your groups selection."); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
+ }
+
//do you have all files needed
- if ((listfile == "") && (namefile == "")) {
+ if ((listfile == "") && (namefile == "") && (countfile == "")) {
namefile = m->getNameFile();
if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
else {
listfile = m->getListFile();
if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
+ else {
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current list, count or namefile and one is required."); m->mothurOutEndLine(); abort = true; }
+ }
}
}
-
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
label = validParameter.validFile(parameters, "label", false);
accnos = m->isTrue(temp);
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
- convert(temp, cutoff);
+ m->mothurConvert(temp, cutoff);
if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
-
}
}
}
}
//**********************************************************************************************************************
-SplitAbundCommand::~SplitAbundCommand(){
- if (groupfile != "") { delete groupMap; }
-}
+SplitAbundCommand::~SplitAbundCommand(){}
//**********************************************************************************************************************
int SplitAbundCommand::execute(){
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ if (Groups.size() != 0) {
+ vector<string> allGroups;
+ if (countfile != "") { allGroups = ct.getNamesOfGroups(); }
+ else { allGroups = groupMap.getNamesOfGroups(); }
+ SharedUtil util;
+ util.setGroups(Groups, allGroups);
+ }
if (listfile != "") { //you are using a listfile to determine abundance
if (outputDir == "") { outputDir = m->hasPath(listfile); }
set<string> processedLabels;
set<string> userLabels = labels;
- input = new InputData(listfile, "list");
- list = input->getListVector();
+ InputData input(listfile, "list");
+ ListVector* list = input.getListVector();
string lastLabel = list->getLabel();
//do you have a namefile or do we need to similate one?
if (namefile != "") { readNamesFile(); }
else { createNameMap(list); }
- if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
- if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if(allLines == 1 || labels.count(list->getLabel()) == 1){
string saveLabel = list->getLabel();
delete list;
- list = input->getListVector(lastLabel); //get new list vector to process
+ list = input.getListVector(lastLabel); //get new list vector to process
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
splitList(list);
lastLabel = list->getLabel();
delete list;
- list = input->getListVector(); //get new list vector to process
+ list = input.getListVector(); //get new list vector to process
}
- if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//output error messages about any remaining user labels
set<string>::iterator it;
}
- if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//run last label if you need to
if (needToRun == true) {
if (list != NULL) { delete list; }
- list = input->getListVector(lastLabel); //get new list vector to process
+ list = input.getListVector(lastLabel); //get new list vector to process
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
splitList(list);
delete list;
}
- delete input;
-
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
- }else { //you are using the namefile to determine abundance
+ }else if (namefile != "") { //you are using the namefile to determine abundance
if (outputDir == "") { outputDir = m->hasPath(namefile); }
splitNames();
if (groupfile != "") { parseGroup(tag); }
if (accnos) { writeAccnos(tag); }
if (fastafile != "") { parseFasta(tag); }
- }
+ }else {
+ //split by countfile
+ string tag = "";
+ splitCount();
+
+ if (accnos) { writeAccnos(tag); }
+ if (fastafile != "") { parseFasta(tag); }
+ }
//set fasta file as new current fastafile
string current = "";
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
abundNames.clear();
//get rareNames and abundNames
+ int numRareBins = 0;
for (int i = 0; i < thisList->getNumBins(); i++) {
if (m->control_pressed) { return 0; }
vector<string> names;
m->splitAtComma(bin, names); //parses bin into individual sequence names
int size = names.size();
-
+
+ //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs
+ if (countfile != "") {
+ size = 0;
+ for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
+ }
+
if (size <= cutoff) {
+ numRareBins++;
for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
}else{
for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
}//end for
- string tag = thisList->getLabel() + ".";
-
- writeList(thisList, tag);
-
+ string tag = thisList->getLabel();
+
+ writeList(thisList, tag, numRareBins);
+
if (groupfile != "") { parseGroup(tag); }
if (accnos) { writeAccnos(tag); }
if (fastafile != "") { parseFasta(tag); }
-
+ if (countfile != "") { parseCount(tag); }
+
return 0;
}
}
}
/**********************************************************************************************************************/
-int SplitAbundCommand::writeList(ListVector* thisList, string tag) {
+int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBins) {
try {
map<string, ofstream*> filehandles;
if (Groups.size() == 0) {
- SAbundVector* sabund = new SAbundVector();
- *sabund = thisList->getSAbundVector();
-
- //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
- // and don't have to store the bins until you are done with the whole vector, this save alot of space.
- int numRareBins = 0;
- for (int i = 0; i <= sabund->getMaxRank(); i++) {
- if (i > cutoff) { break; }
- numRareBins += sabund->get(i);
- }
int numAbundBins = thisList->getNumBins() - numRareBins;
- delete sabund;
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ string rare = getOutputFileName("list",variables);
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["list"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
+ variables["[tag2]"] = "abund";
+ string abund = getOutputFileName("list",variables);
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["list"].push_back(abund);
for (int i = 0; i < thisList->getNumBins(); i++) {
if (m->control_pressed) { break; }
- string bin = list->get(i);
-
- int size = m->getNumNames(bin);
+ string bin = thisList->get(i);
+ vector<string> names;
+ m->splitAtComma(bin, names);
+
+ int size = names.size();
+ if (countfile != "") {
+ size = 0;
+ for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
+ }
if (size <= cutoff) { rout << bin << '\t'; }
else { aout << bin << '\t'; }
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
- outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
- outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ variables["[group]"] = Groups[i];
+ string rareGroupFileName = getOutputFileName("list",variables);
+ variables["[tag2]"] = "abund";
+ string abundGroupFileName = getOutputFileName("list",variables);
+ m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
+ outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
+ outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
}
map<string, string> groupVector;
if (m->control_pressed) { break; }
map<string, string> groupBins;
- string bin = list->get(i);
+ string bin = thisList->get(i);
vector<string> names;
m->splitAtComma(bin, names); //parses bin into individual sequence names
rareAbund = ".abund";
}
- string group = groupMap->getGroup(names[j]);
-
- if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
- itGroup = groupBins.find(group+rareAbund);
- if(itGroup == groupBins.end()) {
- groupBins[group+rareAbund] = names[j]; //add first name
- groupNumBins[group+rareAbund]++;
- }else{ //add another name
- groupBins[group+rareAbund] += "," + names[j];
- }
- }else if(group == "not found") {
- m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
- }
+ if (countfile == "") {
+ string group = groupMap.getGroup(names[j]);
+
+ if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
+ itGroup = groupBins.find(group+rareAbund);
+ if(itGroup == groupBins.end()) {
+ groupBins[group+rareAbund] = names[j]; //add first name
+ groupNumBins[group+rareAbund]++;
+ }else{ //add another name
+ groupBins[group+rareAbund] += "," + names[j];
+ }
+ }else if(group == "not found") {
+ m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
+ }
+ }else {
+ vector<string> thisSeqsGroups = ct.getGroups(names[j]);
+ for (int k = 0; k < thisSeqsGroups.size(); k++) {
+ if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { //only add if this is in a group we want
+ itGroup = groupBins.find(thisSeqsGroups[k]+rareAbund);
+ if(itGroup == groupBins.end()) {
+ groupBins[thisSeqsGroups[k]+rareAbund] = names[j]; //add first name
+ groupNumBins[thisSeqsGroups[k]+rareAbund]++;
+ }else{ //add another name
+ groupBins[thisSeqsGroups[k]+rareAbund] += "," + names[j];
+ }
+ }
+ }
+ }
}
}
}
/**********************************************************************************************************************/
+int SplitAbundCommand::splitCount() { //countfile
+ try {
+ rareNames.clear();
+ abundNames.clear();
+
+ vector<string> allNames = ct.getNamesOfSeqs();
+ for (int i = 0; i < allNames.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ int size = ct.getNumSeqs(allNames[i]);
+ nameMap[allNames[i]] = allNames[i];
+
+ if (size <= cutoff) {
+ rareNames.insert(allNames[i]);
+ }else{
+ abundNames.insert(allNames[i]);
+ }
+ }
+
+ //write out split count files
+ parseCount("");
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitAbundCommand", "splitCount");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
int SplitAbundCommand::splitNames() { //namefile
try {
}
}
/**********************************************************************************************************************/
+int SplitAbundCommand::parseCount(string tag) { //namefile
+ try {
+
+ map<string, ofstream*> filehandles;
+
+ if (Groups.size() == 0) {
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ string rare = getOutputFileName("count",variables);
+ outputNames.push_back(rare); outputTypes["count"].push_back(rare);
+ variables["[tag2]"] = "abund";
+ string abund = getOutputFileName("count",variables);
+ outputNames.push_back(abund); outputTypes["count"].push_back(abund);
+
+ CountTable rareTable;
+ CountTable abundTable;
+ if (ct.hasGroupInfo()) {
+ vector<string> ctGroups = ct.getNamesOfGroups();
+ for (int i = 0; i < ctGroups.size(); i++) { rareTable.addGroup(ctGroups[i]); abundTable.addGroup(ctGroups[i]); }
+ }
+
+ if (rareNames.size() != 0) {
+ for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
+ if (ct.hasGroupInfo()) {
+ vector<int> groupCounts = ct.getGroupCounts(*itRare);
+ rareTable.push_back(*itRare, groupCounts);
+ }else {
+ int groupCounts = ct.getNumSeqs(*itRare);
+ rareTable.push_back(*itRare, groupCounts);
+ }
+ }
+ if (rareTable.hasGroupInfo()) {
+ vector<string> ctGroups = rareTable.getNamesOfGroups();
+ for (int i = 0; i < ctGroups.size(); i++) {
+ if (rareTable.getGroupCount(ctGroups[i]) == 0) { rareTable.removeGroup(ctGroups[i]); }
+ }
+ }
+ rareTable.printTable(rare);
+ }
+
+
+ if (abundNames.size() != 0) {
+ for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
+ if (ct.hasGroupInfo()) {
+ vector<int> groupCounts = ct.getGroupCounts(*itAbund);
+ abundTable.push_back(*itAbund, groupCounts);
+ }else {
+ int groupCounts = ct.getNumSeqs(*itAbund);
+ abundTable.push_back(*itAbund, groupCounts);
+ }
+ }
+ if (abundTable.hasGroupInfo()) {
+ vector<string> ctGroups = abundTable.getNamesOfGroups();
+ for (int i = 0; i < ctGroups.size(); i++) {
+ if (abundTable.getGroupCount(ctGroups[i]) == 0) { abundTable.removeGroup(ctGroups[i]); }
+ }
+ }
+ abundTable.printTable(abund);
+ }
+
+ }else{ //parse names by abundance and group
+ map<string, CountTable*> countTableMap;
+ map<string, CountTable*>::iterator it3;
+
+ for (int i=0; i<Groups.size(); i++) {
+ CountTable* rareCt = new CountTable();
+ rareCt->addGroup(Groups[i]);
+ countTableMap[Groups[i]+".rare"] = rareCt;
+ CountTable* abundCt = new CountTable();
+ abundCt->addGroup(Groups[i]);
+ countTableMap[Groups[i]+".abund"] = abundCt;
+ }
+
+ vector<string> allNames = ct.getNamesOfSeqs();
+ for (int i = 0; i < allNames.size(); i++) {
+ string rareAbund;
+ if (rareNames.count(allNames[i]) != 0) { //you are a rare name
+ rareAbund = ".rare";
+ }else{ //you are a abund name
+ rareAbund = ".abund";
+ }
+
+ vector<string> thisSeqsGroups = ct.getGroups(allNames[i]);
+ for (int j = 0; j < thisSeqsGroups.size(); j++) {
+ if (m->inUsersGroups(thisSeqsGroups[j], Groups)) { //only add if this is in a group we want
+ int num = ct.getGroupCount(allNames[i], thisSeqsGroups[j]);
+ vector<int> nums; nums.push_back(num);
+ countTableMap[thisSeqsGroups[j]+rareAbund]->push_back(allNames[i], nums);
+ }
+ }
+ }
+
+
+ for (it3 = countTableMap.begin(); it3 != countTableMap.end(); it3++) {
+ string fileroot = outputDir + m->getRootName(m->getSimpleName(countfile));
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = it3->first;
+ string filename = getOutputFileName("count",variables);
+ outputNames.push_back(filename); outputTypes["count"].push_back(filename);
+ (it3->second)->printTable(filename);
+ delete it3->second;
+ }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitAbundCommand", "parseCount");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
int SplitAbundCommand::writeNames() { //namefile
try {
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
+ variables["[tag]"] = "rare";
+ string rare = getOutputFileName("name", variables);
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["name"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names";
+ variables["[tag]"] = "abund";
+ string abund = getOutputFileName("name", variables);
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["name"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = "rare";
+ variables["[group]"] = Groups[i];
+ string rareGroupFileName = getOutputFileName("name",variables);
+ variables["[tag]"] = "abund";
+ string abundGroupFileName = getOutputFileName("name",variables);
+ m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
+ m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
}
for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
map<string, string>::iterator itout;
for (int i = 0; i < names.size(); i++) {
- string group = groupMap->getGroup(names[i]);
+ string group = groupMap.getGroup(names[i]);
if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
itout = outputStrings.find(group+rareAbund);
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + it3->first + ".names"); outputTypes["name"].push_back(fileroot + it3->first + ".names");
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = it3->first;
+ outputNames.push_back(getOutputFileName("name",variables)); outputTypes["name"].push_back(getOutputFileName("name",variables));
delete it3->second;
}
}
ofstream aout;
ofstream rout;
-
- string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ string rare = getOutputFileName("accnos",variables);
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
}
rout.close();
- string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos";
+ variables["[tag2]"] = "abund";
+ string abund = getOutputFileName("accnos",variables);
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ variables["[group]"] = Groups[i];
+ m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".rare"]));
+ variables["[tag2]"] = "abund";
+ m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".abund"]));
}
//write rare
for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
- string group = groupMap->getGroup(*itRare);
+ string group = groupMap.getGroup(*itRare);
if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
*(filehandles[group+".rare"]) << *itRare << endl;
//write abund
for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
- string group = groupMap->getGroup(*itAbund);
+ string group = groupMap.getGroup(*itAbund);
if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
*(filehandles[group+".abund"]) << *itAbund << endl;
//close files
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + tag + it3->first + ".accnos"); outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = it3->first;
+ outputNames.push_back(getOutputFileName("accnos",variables)); outputTypes["accnos"].push_back(getOutputFileName("accnos",variables));
delete it3->second;
}
}
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ string rare = getOutputFileName("group",variables);
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["group"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups";
+ variables["[tag2]"] = "abund";
+ string abund = getOutputFileName("group",variables);
+;
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["group"].push_back(abund);
for (int i = 0; i < names.size(); i++) {
- string group = groupMap->getGroup(names[i]);
+ string group = groupMap.getGroup(names[i]);
if (group == "not found") {
m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ variables["[group]"] = Groups[i];
+ m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".rare"]));
+ variables["[tag2]"] = "abund";
+ m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".abund"]));
}
for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
for (int i = 0; i < names.size(); i++) {
- string group = groupMap->getGroup(names[i]);
+ string group = groupMap.getGroup(names[i]);
if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
*(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + tag + it3->first + ".groups"); outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = it3->first;
+ outputNames.push_back(getOutputFileName("group",variables)); outputTypes["group"].push_back(getOutputFileName("group",variables));
delete it3->second;
}
}
ofstream aout;
ofstream rout;
- string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ string rare = getOutputFileName("fasta",variables);
m->openOutputFile(rare, rout);
outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
- string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta";
+ variables["[tag2]"] = "abund";
+ string abund = getOutputFileName("fasta",variables);
m->openOutputFile(abund, aout);
outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
itNames = nameMap.find(seq.getName());
if (itNames == nameMap.end()) {
- m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
+ m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
}else{
if (rareNames.count(seq.getName()) != 0) { //you are a rare name
seq.printSequence(rout);
temp2 = new ofstream;
filehandles[Groups[i]+".abund"] = temp2;
- m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
- m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = "rare";
+ variables["[group]"] = Groups[i];
+ m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".rare"]));
+ variables["[tag2]"] = "abund";
+ m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".abund"]));
}
//open input file
map<string, string>::iterator itNames = nameMap.find(seq.getName());
if (itNames == nameMap.end()) {
- m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
+ m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
}else{
vector<string> names;
m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
}else{ //you are a abund name
rareAbund = ".abund";
}
-
- for (int i = 0; i < names.size(); i++) {
-
- string group = groupMap->getGroup(seq.getName());
-
- if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
- seq.printSequence(*(filehandles[group+rareAbund]));
- }else if(group == "not found") {
- m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
- }
- }
+
+ if (countfile == "") {
+ for (int i = 0; i < names.size(); i++) {
+ string group = groupMap.getGroup(seq.getName());
+
+ if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
+ seq.printSequence(*(filehandles[group+rareAbund]));
+ }else if(group == "not found") {
+ m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
+ }
+ }
+ }else {
+ vector<string> thisSeqsGroups = ct.getGroups(names[0]); //we only need names[0], because there is no namefile
+ for (int i = 0; i < thisSeqsGroups.size(); i++) {
+ if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //only add if this is in a group we want
+ seq.printSequence(*(filehandles[thisSeqsGroups[i]+rareAbund]));
+ }
+ }
+ }
}
}
}
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
(*(filehandles[it3->first])).close();
- outputNames.push_back(fileroot + tag + it3->first + ".fasta"); outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[tag]"] = tag;
+ variables["[tag2]"] = it3->first;
+ outputNames.push_back(getOutputFileName("fasta",variables)); outputTypes["fasta"].push_back(getOutputFileName("fasta",variables));
delete it3->second;
}
}