A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
+ A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
+ A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
+ A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = "<group>"; };
A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
A7E9B66312D37EC400DA6239 /* blastalign.hpp */,
A7E9B66412D37EC400DA6239 /* blastdb.cpp */,
A7E9B66512D37EC400DA6239 /* blastdb.hpp */,
+ A74D59A6159A1E3600043046 /* counttable.h */,
+ A74D59A3159A1E2000043046 /* counttable.cpp */,
+ A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
A7E9B6BD12D37EC400DA6239 /* database.cpp */,
A7E9B6BE12D37EC400DA6239 /* database.hpp */,
A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
- A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
A7E9B6DF12D37EC400DA6239 /* fastamap.h */,
A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */,
A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */,
A73901081588C40900ED2ED6 /* loadlogfilecommand.cpp in Sources */,
+ A74D59A4159A1E2000043046 /* counttable.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../release\\\"\"",
- "VERSION=\"\\\"1.25.0\\\"\"",
- "RELEASE_DATE=\"\\\"5/01/2012\\\"\"",
+ "VERSION=\"\\\"1.26.0\\\"\"",
+ "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
);
"GCC_VERSION[arch=*]" = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_MODEL_TUNING = "";
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
- "VERSION=\"\\\"1.25.0\\\"\"",
- "RELEASE_DATE=\"\\\"4/30/2012\\\"\"",
+ "VERSION=\"\\\"1.26.0\\\"\"",
+ "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
);
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
commands["remove.otulabels"] = "remove.otulabels";
commands["make.contigs"] = "make.contigs";
commands["load.logfile"] = "load.logfile";
+ commands["make.table"] = "make.table";
commands["quit"] = "MPIEnabled";
}
else if(commandName == "make.shared") { command = new SharedCommand(optionString); }
else if(commandName == "get.commandinfo") { command = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); }
- else if(commandName == "count.seqs") { command = new CountSeqsCommand(optionString); }
+ else if((commandName == "count.seqs") || (commandName == "make.table")) { command = new CountSeqsCommand(optionString); }
else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); }
else if(commandName == "clear.memory") { command = new ClearMemoryCommand(optionString); }
else if(commandName == "summary.tax") { command = new SummaryTaxCommand(optionString); }
else if(commandName == "make.shared") { pipecommand = new SharedCommand(optionString); }
else if(commandName == "get.commandinfo") { pipecommand = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); }
- else if(commandName == "count.seqs") { pipecommand = new CountSeqsCommand(optionString); }
+ else if((commandName == "count.seqs") || (commandName == "make.table")) { pipecommand = new CountSeqsCommand(optionString); }
else if(commandName == "count.groups") { pipecommand = new CountGroupsCommand(optionString); }
else if(commandName == "clear.memory") { pipecommand = new ClearMemoryCommand(optionString); }
else if(commandName == "summary.tax") { pipecommand = new SummaryTaxCommand(optionString); }
else if(commandName == "make.shared") { shellcommand = new SharedCommand(); }
else if(commandName == "get.commandinfo") { shellcommand = new GetCommandInfoCommand(); }
else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); }
- else if(commandName == "count.seqs") { shellcommand = new CountSeqsCommand(); }
+ else if((commandName == "count.seqs") || (commandName == "make.table")) { shellcommand = new CountSeqsCommand(); }
else if(commandName == "count.groups") { shellcommand = new CountGroupsCommand(); }
else if(commandName == "clear.memory") { shellcommand = new ClearMemoryCommand(); }
else if(commandName == "summary.tax") { shellcommand = new SummaryTaxCommand(); }
try {
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string CountSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The count.seqs command reads a name file and outputs a .seq.count file. You may also provide a group file to get the counts broken down by group.\n";
+ helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count.table file. You may also provide a group file to get the counts broken down by group.\n";
helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
+ helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n";
- helpString += "Example count.seqs(name=amazon.names).\n";
+ helpString += "Example count.seqs(name=amazon.names) or make.table(name=amazon.names).\n";
helpString += "Note: No spaces between parameter labels (i.e. name), '=' and parameters (i.e.yourNameFile).\n";
return helpString;
}
it = outputTypes.find(type);
if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
else {
- if (type == "summary") { outputFileName = "seq.count"; }
+ if (type == "counttable") { outputFileName = "count.table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
+ outputTypes["counttable"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "CountSeqsCommand", "CountSeqsCommand");
//initialize outputTypes
vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
+ outputTypes["counttable"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = "all"; }
m->splitAtDash(groups, Groups);
+
+ string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
+ large = m->isTrue(temp);
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(namefile); }
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- ofstream out;
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("summary");
- m->openOutputFile(outputFileName, out); outputTypes["summary"].push_back(outputFileName);
- out << "Representative_Sequence\ttotal\t";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("counttable");
+
+ int total = 0;
+ if (!large) { total = processSmall(outputFileName); }
+ else { total = processLarge(outputFileName); }
+
+ if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+
+ //set rabund file as new current rabundfile
+ itTypes = outputTypes.find("counttable");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { string current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+ m->mothurOut(outputFileName); m->mothurOutEndLine();
+ m->mothurOutEndLine();
- GroupMap* groupMap;
+ return 0;
+ }
+
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int CountSeqsCommand::processSmall(string outputFileName){
+ try {
+ ofstream out;
+ m->openOutputFile(outputFileName, out); outputTypes["counttable"].push_back(outputFileName);
+ outputNames.push_back(outputFileName); outputTypes["counttable"].push_back(outputFileName);
+ out << "Representative_Sequence\ttotal\t";
+
+ GroupMap* groupMap;
if (groupfile != "") {
groupMap = new GroupMap(groupfile); groupMap->readMap();
if (m->control_pressed) { break; }
string firstCol, secondCol;
- in >> firstCol >> secondCol; m->gobble(in);
+ in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
vector<string> names;
m->splitAtChar(secondCol, names, ',');
total += names.size();
}
in.close();
+ out.close();
if (groupfile != "") { delete groupMap; }
+
+ return total;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "processSmall");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int CountSeqsCommand::processLarge(string outputFileName){
+ try {
+ set<string> namesOfGroups;
+ map<string, int> initial;
+ for (set<string>::iterator it = namesOfGroups.begin(); it != namesOfGroups.end(); it++) { initial[(*it)] = 0; }
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["counttable"].push_back(outputFileName);
+ out << "Representative_Sequence\ttotal\t";
+ if (groupfile == "") { out << endl; }
+
+ map<string, unsigned long long> namesToIndex;
+ string outfile = m->getRootName(groupfile) + "sorted.groups.temp";
+ string outName = m->getRootName(namefile) + "sorted.name.temp";
+ map<int, string> indexToName;
+ map<int, string> indexToGroup;
+ if (groupfile != "") {
+ time_t estart = time(NULL);
+ //convert name file to redundant -> unique. set unique name equal to index so we can use vectors, save name for later.
+ string newNameFile = m->getRootName(namefile) + ".name.temp";
+ string newGroupFile = m->getRootName(groupfile) + ".group.temp";
+ indexToName = processNameFile(newNameFile);
+ indexToGroup = getGroupNames(newGroupFile, namesOfGroups);
+
+ //sort file by first column so the names of sequences will be easier to find
+ //use the unix sort
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ string command = "sort -n " + newGroupFile + " -o " + outfile;
+ system(command.c_str());
+ command = "sort -n " + newNameFile + " -o " + outName;
+ system(command.c_str());
+ #else //sort using windows sort
+ string command = "sort " + newGroupFile + " /O " + outfile;
+ system(command.c_str());
+ command = "sort " + newNameFile + " /O " + outName;
+ system(command.c_str());
+ #endif
+ m->mothurRemove(newNameFile);
+ m->mothurRemove(newGroupFile);
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort and index the group and name files. "); m->mothurOutEndLine();
+ }else { outName = namefile; }
+
+ time_t estart = time(NULL);
+ //open input file
+ ifstream in;
+ m->openInputFile(outName, in);
+
+ //open input file
+ ifstream in2;
- if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+ int total = 0;
+ vector< vector<int> > nameMapCount;
+ if (groupfile != "") {
+ m->openInputFile(outfile, in2);
+ nameMapCount.resize(indexToName.size());
+ for (int i = 0; i < nameMapCount.size(); i++) {
+ nameMapCount[i].resize(indexToGroup.size(), 0);
+ }
+ }
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ string firstCol;
+ in >> firstCol; m->gobble(in);
+
+ if (groupfile != "") {
+ int uniqueIndex;
+ in >> uniqueIndex; m->gobble(in);
+
+ string name; int groupIndex;
+ in2 >> name >> groupIndex; m->gobble(in2);
+
+ if (name != firstCol) { m->mothurOut("[ERROR]: found " + name + " in your groupfile, but " + firstCol + " was in your namefile, please correct.\n"); m->control_pressed = true; }
+
+ nameMapCount[uniqueIndex][groupIndex]++;
+ total++;
+ }else {
+ string secondCol;
+ in >> secondCol; m->gobble(in);
+ int num = m->getNumNames(secondCol);
+ out << firstCol << '\t' << num << endl;
+ total += num;
+ }
+ }
+ in.close();
+
+ if (groupfile != "") {
+ m->mothurRemove(outfile);
+ m->mothurRemove(outName);
+ in2.close();
+ for (map<int, string>::iterator it = indexToGroup.begin(); it != indexToGroup.end(); it++) { out << it->second << '\t'; }
+ out << endl;
+ for (int i = 0; i < nameMapCount.size(); i++) {
+ string totalsLine = "";
+ int seqTotal = 0;
+ for (int j = 0; j < nameMapCount[i].size(); j++) {
+ seqTotal += nameMapCount[i][j];
+ totalsLine += toString(nameMapCount[i][j]) + '\t';
+ }
+ out << indexToName[i] << '\t' << seqTotal << '\t' << totalsLine << endl;
+ }
+ }
+
+ out.close();
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to create the count table file. "); m->mothurOutEndLine();
+
+ return total;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "processLarge");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+map<int, string> CountSeqsCommand::processNameFile(string name) {
+ try {
+ map<int, string> indexToNames;
+
+ ofstream out;
+ m->openOutputFile(name, out);
+
+ //open input file
+ ifstream in;
+ m->openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+ int count = 0;
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ //parse names into vector
+ vector<string> theseNames;
+ m->splitAtComma(secondCol, theseNames);
+ for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; }
+ indexToNames[count] = firstCol;
+ pairDone = false;
+ count++;
+ }
+ }
+ }
+ in.close();
+ out.close();
- m->mothurOutEndLine();
- m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
- m->mothurOutEndLine();
- m->mothurOut("Output File Name: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine();
- m->mothurOutEndLine();
+ return indexToNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountSeqsCommand", "processNameFile");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& namesOfGroups) {
+ try {
+ map<int, string> indexToGroups;
+ map<string, int> groupIndex;
+ map<string, int>::iterator it;
+
+ ofstream out;
+ m->openOutputFile(filename, out);
+
+ //open input file
+ ifstream in;
+ m->openInputFile(groupfile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+ int count = 0;
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = m->splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ it = groupIndex.find(secondCol);
+ if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
+ groupIndex[secondCol] = count;
+ count++;
+ }
+ out << firstCol << '\t' << groupIndex[secondCol] << endl;
+ namesOfGroups.insert(secondCol);
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+ out.close();
- return 0;
+ for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; }
+
+ return indexToGroups;
}
-
catch(exception& e) {
- m->errorOut(e, "CountSeqsCommand", "execute");
+ m->errorOut(e, "CountSeqsCommand", "getGroupNames");
exit(1);
}
}
//**********************************************************************************************************************
+
+
+
private:
string namefile, groupfile, outputDir, groups;
- bool abort;
- vector<string> Groups;
+ bool abort, large;
+ vector<string> Groups, outputNames;
+
+ int processSmall(string);
+ int processLarge(string);
+ map<int, string> processNameFile(string);
+ map<int, string> getGroupNames(string, set<string>&);
+
};
#endif
--- /dev/null
+//
+// counttable.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 6/26/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "counttable.h"
+
+
+/************************************************************/
+int CountTable::readTable(string file) {
+ try {
+ filename = file;
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ string headers = m->getline(in); m->gobble(in);
+ vector<string> columnHeaders = m->splitWhiteSpace(headers);
+
+ int numGroups = 0;
+ groups.clear();
+ totalGroups.clear();
+ indexGroupMap.clear();
+ indexNameMap.clear();
+ counts.clear();
+ map<int, string> originalGroupIndexes;
+ if (columnHeaders.size() > 2) { hasGroups = true; numGroups = columnHeaders.size() - 2; }
+ for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; totalGroups.push_back(0); }
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(groups.begin(), groups.end());
+ for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
+ m->setAllGroups(groups);
+
+ bool error = false;
+ string name;
+ int thisTotal;
+ uniques = 0;
+ total = 0;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ in >> name >> thisTotal; m->gobble(in);
+
+ //if group info, then read it
+ vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+ for (int i = 0; i < numGroups; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; m->gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; }
+
+ map<string, int>::iterator it = indexNameMap.find(name);
+ if (it == indexNameMap.end()) {
+ if (hasGroups) { counts.push_back(groupCounts); }
+ indexNameMap[name] = uniques;
+ totals.push_back(thisTotal);
+ total += thisTotal;
+ uniques++;
+ }else {
+ error = true;
+ m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
+ }
+ }
+ in.close();
+
+ if (error) { m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "readTable");
+ exit(1);
+ }
+}
+/************************************************************/
+//group counts for a seq
+vector<int> CountTable::getGroupCounts(string seqName) {
+ try {
+ vector<int> temp;
+ if (hasGroups) {
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ temp = counts[it->second];
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return temp;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroupCounts");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of sequences for the group
+int CountTable::getGroupCount(string groupName) {
+ try {
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(groupName);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return totalGroups[it->second];
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroupCount");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of sequences for the seq for the group
+int CountTable::getGroupCount(string seqName, string groupName) {
+ try {
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(groupName);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(seqName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return counts[it2->second][it->second];
+ }
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroupCount");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::getNumSeqs(string seqName) {
+ try {
+
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return totals[it->second];
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNumSeqs");
+ exit(1);
+ }
+}
+/************************************************************/
+//returns names of seqs
+vector<string> CountTable::getNamesOfSeqs() {
+ try {
+ vector<string> names;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ names.push_back(it->first);
+ }
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNamesOfSeqs");
+ exit(1);
+ }
+}
+/************************************************************/
+//returns names of seqs
+int CountTable::mergeCounts(string seq1, string seq2) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(seq1);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(seq2);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ //merge data
+ for (int i = 0; i < groups.size(); i++) {
+ counts[it->second][i] += counts[it2->second][i];
+ counts[it2->second][i] = 0;
+ }
+ totals[it->second] += totals[it2->second];
+ totals[it2->second] = 0;
+ uniques--;
+ indexNameMap.erase(it2);
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNamesOfSeqs");
+ exit(1);
+ }
+}
+
+/************************************************************/
+
+
--- /dev/null
+#ifndef Mothur_counttable_h
+#define Mothur_counttable_h
+
+
+//
+// counttable.h
+// Mothur
+//
+// Created by Sarah Westcott on 6/26/12.
+// Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+//This class is designed to read a count table file and store its data.
+//count table files look like:
+
+/*
+ Representative_Sequence total F003D000 F003D002 F003D004 F003D006 F003D008 F003D142 F003D144 F003D146 F003D148 F003D150 MOCK.GQY1XT001
+ GQY1XT001C296C 6051 409 985 923 937 342 707 458 439 387 464 0
+ GQY1XT001A3TJI 4801 396 170 413 442 306 769 581 576 497 651 0
+ GQY1XT001CS2B8 3018 263 226 328 460 361 336 248 290 187 319 0
+ GQY1XT001CD9IB 2736 239 177 256 405 306 286 263 248 164 392 0
+
+ or if no group info was used to create it
+
+ Representative_Sequence total
+ GQY1XT001C296C 6051
+ GQY1XT001A3TJI 4801
+ GQY1XT001CS2B8 3018
+ GQY1XT001CD9IB 2736
+ GQY1XT001ARCB1 2183
+ GQY1XT001CNF2P 2796
+ GQY1XT001CJMDA 1667
+ GQY1XT001CBVJB 3758
+
+
+ */
+
+
+#include "mothurout.h"
+
+class CountTable {
+
+ public:
+
+ CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; }
+ ~CountTable() {}
+
+ int readTable(string);
+
+ bool hasGroupInfo() { return hasGroups; }
+ int getNumGroups() { return groups.size(); }
+ vector<string> getNamesOfGroups() { return groups; } //returns group names, if no group info vector is blank.
+
+ vector<int> getGroupCounts(string); //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function.
+ int getGroupCount(string, string); //returns number of seqs for that group for that seq
+ int getGroupCount(string); // returns total seqs for that group
+ int getNumSeqs(string); //returns total seqs for that seq
+ int getNumSeqs() { return total; } //return total number of seqs
+ int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs
+ int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
+ vector<string> getNamesOfSeqs();
+ int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
+
+ private:
+ string filename;
+ MothurOut* m;
+ bool hasGroups;
+ int total, uniques;
+ vector<string> groups;
+ vector< vector<int> > counts;
+ vector<int> totals;
+ vector<int> totalGroups;
+ map<string, int> indexNameMap;
+ map<string, int> indexGroupMap;
+
+};
+
+#endif
CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy);
- CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
+ CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(plist);
+ CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(pshared);
CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
string CreateDatabaseCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
- helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
+ helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
+ helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
}
//check for required parameters
listfile = validParameter.validFile(parameters, "list", true);
- if (listfile == "not found") {
- //if there is a current list file, use it
+ if (listfile == "not found") { listfile = ""; }
+ else if (listfile == "not open") { listfile = ""; abort = true; }
+ else { m->setListFile(listfile); }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not found") { sharedfile = ""; }
+ else if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else { m->setSharedFile(sharedfile); }
+
+ if ((sharedfile == "") && (listfile == "")) {
+ //is there are current file available for either of these?
+ //give priority to list, then shared
listfile = m->getListFile();
if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+ else {
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
}
- else if (listfile == "not open") { abort = true; }
- else { m->setListFile(listfile); }
+ else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; }
+
+ if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } }
+ else { if (outputDir == "") { outputDir = m->hasPath(listfile); } }
contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
if (contaxonomyfile == "not found") { //if there is a current list file, use it
//taxonomies holds the taxonomy info for each Otu
//classifyOtuSizes holds the size info of each Otu to help with error checking
vector<string> taxonomies;
- vector<int> classifyOtuSizes = readTax(taxonomies);
+ vector<string> otuLabels;
+ vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
if (m->control_pressed) { return 0; }
//names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
map<string, string> repNames;
- int numUniqueNamesFile = m->readNames(repnamesfile, repNames);
+ int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
//are there the same number of otus in the fasta and name files
if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
if (m->control_pressed) { return 0; }
- //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
- ListVector* list = getList();
-
- if (m->control_pressed) { delete list; return 0; }
- GroupMap* groupmap = NULL;
- if (groupfile != "") {
- groupmap = new GroupMap(groupfile);
- groupmap->readMap();
- }
-
- if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
-
- if (outputDir == "") { outputDir += m->hasPath(listfile); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("database");
+ string outputFileName = "";
+ if (listfile != "") { outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("database"); }
+ else { outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("database"); }
outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
string header = "OTUNumber\tAbundance\t";
- if (groupfile != "") {
- header = "OTUNumber\t";
- for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
- }
- header += "repSeqName\trepSeq\tOTUConTaxonomy";
- out << header << endl;
+
- for (int i = 0; i < list->getNumBins(); i++) {
+ if (listfile != "") {
+ //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
+ ListVector* list = getList();
- if (m->control_pressed) { break; }
+ if (otuLabels.size() != list->getNumBins()) {
+ m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; }
- out << (i+1) << '\t';
+ if (m->control_pressed) { delete list; return 0; }
- vector<string> binNames;
- string bin = list->get(i);
-
- map<string, string>::iterator it = repNames.find(bin);
- if (it == repNames.end()) {
- m->mothurOut("[ERROR: OTU " + toString(i+1) + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ GroupMap* groupmap = NULL;
+ if (groupfile != "") {
+ groupmap = new GroupMap(groupfile);
+ groupmap->readMap();
}
- m->splitAtComma(bin, binNames);
+ if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
- //sanity check
- if (binNames.size() != classifyOtuSizes[i]) {
- m->mothurOut("[ERROR: OTU " + toString(i+1) + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ if (groupfile != "") {
+ header = "OTUNumber\t";
+ for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
}
+ header += "repSeqName\trepSeq\tOTUConTaxonomy";
+ out << header << endl;
- //output abundances
- if (groupfile != "") {
- string groupAbunds = "";
- map<string, int> counts;
- //initialize counts to 0
- for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+ for (int i = 0; i < list->getNumBins(); i++) {
+
+ if (m->control_pressed) { break; }
+
+ out << otuLabels[i] << '\t';
+
+ vector<string> binNames;
+ string bin = list->get(i);
- //find abundances by group
- bool error = false;
- for (int j = 0; j < binNames.size(); j++) {
- string group = groupmap->getGroup(binNames[j]);
- if (group == "not found") {
- m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
- error = true;
- }else { counts[group]++; }
+ map<string, string>::iterator it = repNames.find(bin);
+ if (it == repNames.end()) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
}
- //output counts
- for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; }
+ m->splitAtComma(bin, binNames);
- if (error) { m->control_pressed = true; }
- }else { out << binNames.size() << '\t'; }
+ //sanity check
+ if (binNames.size() != classifyOtuSizes[i]) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }
+
+ //output abundances
+ if (groupfile != "") {
+ string groupAbunds = "";
+ map<string, int> counts;
+ //initialize counts to 0
+ for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+
+ //find abundances by group
+ bool error = false;
+ for (int j = 0; j < binNames.size(); j++) {
+ string group = groupmap->getGroup(binNames[j]);
+ if (group == "not found") {
+ m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
+ error = true;
+ }else { counts[group]++; }
+ }
+
+ //output counts
+ for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; }
+
+ if (error) { m->control_pressed = true; }
+ }else { out << binNames.size() << '\t'; }
+
+ //output repSeq
+ out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+ }
+
+
+ delete list;
+ if (groupfile != "") { delete groupmap; }
+
+ }else {
+ vector<SharedRAbundVector*> lookup = getShared();
+
+ header = "OTUNumber\t";
+ for (int i = 0; i < lookup.size(); i++) { header += lookup[i]->getGroup() + '\t'; }
+ header += "repSeqName\trepSeq\tOTUConTaxonomy";
+ out << header << endl;
- //output repSeq
- out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+ for (int h = 0; h < lookup[0]->getNumBins(); h++) {
+
+ if (m->control_pressed) { break; }
+
+ int index = findIndex(otuLabels, m->currentBinLabels[h]);
+ if (index == -1) { m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+
+ if (m->control_pressed) { break; }
+
+ out << otuLabels[index] << '\t';
+
+ int totalAbund = 0;
+ for (int i = 0; i < lookup.size(); i++) {
+ int abund = lookup[i]->getAbundance(h);
+ totalAbund += abund;
+ out << abund << '\t';
+ }
+
+ //sanity check
+ if (totalAbund != classifyOtuSizes[index]) {
+ m->mothurOut("[ERROR: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }
+
+ //output repSeq
+ out << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
+ }
}
out.close();
-
- delete list;
- if (groupfile != "") { delete groupmap; }
-
if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
m->mothurOutEndLine();
}
}
//**********************************************************************************************************************
-vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies){
+int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
+ try {
+ int index = -1;
+ for (int i = 0; i < otuLabels.size(); i++) {
+ if (otuLabels[i] == label) { index = i; break; }
+ }
+ return index;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CreateDatabaseCommand", "findIndex");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
try {
vector<int> sizes;
sizes.push_back(size);
taxonomies.push_back(tax);
+ otuLabels.push_back(otu);
}
in.close();
}
}
//**********************************************************************************************************************
+vector<SharedRAbundVector*> CreateDatabaseCommand::getShared(){
+ try {
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ if (label == "") { label = lastLabel; return lookup; }
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && (userLabels.size() != 0)) {
+ if (m->control_pressed) { return lookup; }
+
+ if(labels.count(lookup[0]->getLabel()) == 1){
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = lookup[0]->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors();
+ }
+
+
+ if (m->control_pressed) { return lookup; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ }
+
+ return lookup;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CreateDatabaseCommand", "getList");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
private:
bool abort;
- string listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
+ string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
vector<string> outputNames;
vector<int> readFasta(vector<Sequence>&);
- vector<int> readTax(vector<string>&);
+ vector<int> readTax(vector<string>&, vector<string>&);
ListVector* getList();
+ vector<SharedRAbundVector*> getShared();
+ int findIndex(vector<string>&, string);
};
CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn);
CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta);
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
+ CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
convert(temp, compress);
output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; }
+ if (output == "phylip") { output = "lt"; }
if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; }
int GetCoreMicroBiomeCommand::createTable(vector<SharedRAbundFloatVector*>& lookup){
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + getOutputFileNameTag("coremicrobiome");
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("coremicrobiome");
outputNames.push_back(outputFileName); outputTypes["coremicrobiome"].push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
m->setFlowFile("");
}else if (types[i] == "biom") {
m->setBiomFile("");
+ }else if (types[i] == "counttable") {
+ m->setCountTableFile("");
}else if (types[i] == "processors") {
m->setProcessors("1");
}else if (types[i] == "all") {
string helpString = "";
helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n";
- helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n";
- helpString += "The otucorr parameter is input the results of the otu.association command.\n";
- helpString += "The corraxes parameter is input the results of the corr.axes command.\n";
+ helpString += "The constaxonomy parameter is used to input the results of the classify.otu command.\n";
+ helpString += "The otucorr parameter is used to input the results of the otu.association command.\n";
+ helpString += "The corraxes parameter is used to input the results of the corr.axes command.\n";
helpString += "The get.otulabels commmand should be in the following format: \n";
helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n";
return helpString;
string helpString = "";
helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
- helpString += "The ffastq and rfastq parameter is required.\n";
- helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
+ helpString += "The ffastq and rfastq parameters are required.\n";
+ helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("fasta");
string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("qfile");
- string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatches");
+ string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + getOutputFileNameTag("mismatch");
outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"5/14/2012\""
-VERSION = "\"1.25.1\""
+RELEASE_DATE = "\"7/9/2012\""
+VERSION = "\"1.26.0\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
types.insert("tree");
types.insert("flow");
types.insert("biom");
+ types.insert("counttable");
types.insert("processors");
return types;
if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
+ if (counttablefile != "") { mothurOut("counttable=" + counttablefile); mothurOutEndLine(); }
if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
}
if (treefile != "") { return true; }
if (flowfile != "") { return true; }
if (biomfile != "") { return true; }
+ if (counttablefile != "") { return true; }
if (processors != "1") { return true; }
return hasCurrent;
taxonomyfile = "";
flowfile = "";
biomfile = "";
+ counttablefile = "";
processors = "1";
}
catch(exception& e) {
positions.push_back(0);
while(!in.eof()){
- //unsigned long long lastpos = in.tellg();
- //input = getline(in);
- //if (input.length() != 0) {
- //unsigned long long pos = in.tellg();
- //if (pos != -1) { positions.push_back(pos - input.length() - 1); }
- //else { positions.push_back(lastpos); }
- //}
- //gobble(in); //has to be here since windows line endings are 2 characters and mess up the positions
-
-
//getline counting reads
char d = in.get(); count++;
while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
for (int i = 0; i < size; i++) {
if (!isspace(buffer[i])) { rest += buffer[i]; }
else {
- pieces.push_back(rest); rest = "";
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
while (i < size) { //gobble white space
if (isspace(buffer[i])) { i++; }
else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
for (int i = 0; i < input.length(); i++) {
if (!isspace(input[i])) { rest += input[i]; }
else {
- pieces.push_back(rest); rest = "";
+ if (rest != "") { pieces.push_back(rest); rest = ""; }
while (i < input.length()) { //gobble white space
if (isspace(input[i])) { i++; }
else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
}
}
/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
+ try {
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ nameMap[secondCol] = firstCol;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ return nameMap.size();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
try {
nameMap.clear(); nameCount.clear();
//map<string, string> names;
vector<string> binLabelsInFile;
vector<string> currentBinLabels;
- string saveNextLabel, argv, sharedHeaderMode;
+ string saveNextLabel, argv, sharedHeaderMode, groupMode;
bool printedHeaders, commandInputsConvertError;
//functions from mothur.h
int readNames(string, map<string, string>&, map<string, int>&);
int readNames(string, map<string, string>&);
int readNames(string, map<string, string>&, bool);
+ int readNames(string, map<string, string>&, int);
int readNames(string, map<string, vector<string> >&);
int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
int mothurRemove(string);
string getTaxonomyFile() { return taxonomyfile; }
string getFlowFile() { return flowfile; }
string getBiomFile() { return biomfile; }
+ string getCountTableFile() { return counttablefile; }
string getProcessors() { return processors; }
void setListFile(string f) { listfile = getFullPathName(f); }
void setTreeFile(string f) { treefile = getFullPathName(f); }
- void setGroupFile(string f) { groupfile = getFullPathName(f); }
+ void setGroupFile(string f) { groupfile = getFullPathName(f); groupMode = "group"; }
void setPhylipFile(string f) { phylipfile = getFullPathName(f); }
void setColumnFile(string f) { columnfile = getFullPathName(f); }
void setNameFile(string f) { namefile = getFullPathName(f); }
void setTaxonomyFile(string f) { taxonomyfile = getFullPathName(f); }
void setFlowFile(string f) { flowfile = getFullPathName(f); }
void setBiomFile(string f) { biomfile = getFullPathName(f); }
+ void setCountTableFile(string f) { counttablefile = getFullPathName(f); groupMode = "count"; }
void setProcessors(string p) { processors = p; mothurOut("\nUsing " + toString(p) + " processors.\n"); }
void printCurrentFiles();
processors = "1";
flowfile = "";
biomfile = "";
+ counttablefile = "";
gui = false;
printedHeaders = false;
commandInputsConvertError = false;
mothurCalling = false;
debug = false;
sharedHeaderMode = "";
+ groupMode = "group";
}
~MothurOut();
string releaseDate, version;
string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile;
- string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile;
+ string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile, counttablefile;
vector<string> Groups;
vector<string> namesOfGroups;
int rowIndex = 0;
while(fileHandle){
- fileHandle >> firstCol; //read from first column
+ fileHandle >> firstCol; m->gobble(fileHandle); //read from first column
fileHandle >> secondCol; //read from second column
itData = (*this).find(firstCol);
it->second = m->getTaxonomyFile();
}else if (it->first == "biom") {
it->second = m->getBiomFile();
+ }else if (it->first == "counttable") {
+ it->second = m->getCountTableFile();
}else {
m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine();
}
CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput);
+ CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput);
CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc);
CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends);
CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress);
align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; }
+ if (output=="phylip") { output = "lt"; }
if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
calc = validParameter.validFile(parameters, "calc", false);
#include "sharedsobs.h"
#include "sharednseqs.h"
#include "sharedutilities.h"
+#include "subsample.h"
//**********************************************************************************************************************
vector<string> RareFactSharedCommand::setParameters(){
CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(psubsampleiters);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
+ helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n";
+ helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
helpString += validCalculator.printCalc("sharedrarefaction");
helpString += "The label parameter is used to analyze specific labels in your input.\n";
temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "T"; }
groupMode = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "subsampleiters", false); if (temp == "not found") { temp = "1000"; }
+ m->mothurConvert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
+ if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+ else {
+ if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
+ else { subsample = false; }
+ }
+
+ if (subsample == false) { iters = 1; }
}
}
}
}
-
+
+ /******************************************************/
+ if (subsample) {
+ if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+ subsampleSize = subset[0]->getNumSeqs();
+ for (int i = 1; i < subset.size(); i++) {
+ int thisSize = subset[i]->getNumSeqs();
+
+ if (thisSize < subsampleSize) { subsampleSize = thisSize; }
+ }
+ }else {
+ newGroups.clear();
+ vector<SharedRAbundVector*> temp;
+ for (int i = 0; i < subset.size(); i++) {
+ if (subset[i]->getNumSeqs() < subsampleSize) {
+ m->mothurOut(subset[i]->getGroup() + " contains " + toString(subset[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+ delete subset[i];
+ }else {
+ newGroups.push_back(subset[i]->getGroup());
+ temp.push_back(subset[i]);
+ }
+ }
+ subset = temp;
+ }
+
+ if (subset.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ }
+ /******************************************************/
+
ValidCalculators validCalculator;
for (int i=0; i<Estimators.size(); i++) {
if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
+ if (subsample) { subsampleLookup(subset, fileNameRoot); }
+
processedLabels.insert(subset[0]->getLabel());
userLabels.erase(subset[0]->getLabel());
}
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
+ if (subsample) { subsampleLookup(subset, fileNameRoot); }
+
processedLabels.insert(subset[0]->getLabel());
userLabels.erase(subset[0]->getLabel());
rCurve = new Rarefact(subset, rDisplays);
rCurve->getSharedCurve(freq, nIters);
delete rCurve;
+
+ if (subsample) { subsampleLookup(subset, fileNameRoot); }
+
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
}
}
}
//**********************************************************************************************************************
+int RareFactSharedCommand::subsampleLookup(vector<SharedRAbundVector*>& thisLookup, string fileNameRoot) {
+ try {
+
+ map<string, vector<string> > filenames;
+ for (int thisIter = 0; thisIter < iters; thisIter++) {
+
+ vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+
+ //we want the summary results for the whole dataset, then the subsampling
+ SubSample sample;
+ vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ tempLabels = sample.getSample(newLookup, subsampleSize);
+ thisItersLookup = newLookup;
+
+
+ Rarefact* rCurve;
+ vector<Display*> rDisplays;
+
+ string thisfileNameRoot = fileNameRoot + toString(thisIter);
+
+ ValidCalculators validCalculator;
+ for (int i=0; i<Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) {
+ if (Estimators[i] == "sharedobserved") {
+ rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"), "")));
+ filenames["sharedrarefaction"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedrarefaction"));
+ }else if (Estimators[i] == "sharednseqs") {
+ rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"), "")));
+ filenames["sharedr_nseqs"].push_back(thisfileNameRoot+getOutputFileNameTag("sharedr_nseqs"));
+ }
+ }
+ }
+
+ rCurve = new Rarefact(thisItersLookup, rDisplays);
+ rCurve->getSharedCurve(freq, nIters);
+ delete rCurve;
+
+ //clean up memory
+ for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+ thisItersLookup.clear();
+ for(int i=0;i<rDisplays.size();i++){ delete rDisplays[i]; }
+ }
+
+ //create std and ave outputs
+ vector< vector< vector< double > > > results; //iter -> numSampled -> data
+ for (map<string, vector<string> >::iterator it = filenames.begin(); it != filenames.end(); it++) {
+ vector<string> thisTypesFiles = it->second;
+ vector<string> columnHeaders;
+ for (int i = 0; i < thisTypesFiles.size(); i++) {
+ ifstream in;
+ m->openInputFile(thisTypesFiles[i], in);
+
+ string headers = m->getline(in); m->gobble(in);
+ columnHeaders = m->splitWhiteSpace(headers);
+ int numCols = columnHeaders.size();
+
+ vector<vector<double> > thisFilesLines;
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+ vector<double> data; data.resize(numCols, 0);
+ //read numSampled line
+ for (int j = 0; j < numCols; j++) { in >> data[j]; m->gobble(in); }
+ thisFilesLines.push_back(data);
+ }
+ in.close();
+ results.push_back(thisFilesLines);
+ m->mothurRemove(thisTypesFiles[i]);
+ }
+
+ if (!m->control_pressed) {
+ //process results
+ string outputFile = fileNameRoot + "ave-std." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag(it->first);
+ ofstream out;
+ m->openOutputFile(outputFile, out);
+ outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile);
+
+ out << columnHeaders[0] << '\t' << "method\t";
+ for (int i = 1; i < columnHeaders.size(); i++) { out << columnHeaders[i] << '\t'; }
+ out << endl;
+
+ vector< vector<double> > aveResults; aveResults.resize(results[0].size());
+ for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); }
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < aveResults.size(); i++) { //initialize sums to zero.
+ aveResults[i][0] = results[thisIter][i][0];
+ for (int j = 1; j < aveResults[i].size(); j++) {
+ aveResults[i][j] += results[thisIter][i][j];
+ }
+ }
+ }
+
+ for (int i = 0; i < aveResults.size(); i++) { //finds average.
+ for (int j = 1; j < aveResults[i].size(); j++) {
+ aveResults[i][j] /= (float) iters;
+ }
+ }
+
+ //standard deviation
+ vector< vector<double> > stdResults; stdResults.resize(results[0].size());
+ for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); }
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdResults.size(); i++) {
+ stdResults[i][0] = aveResults[i][0];
+ for (int j = 1; j < stdResults[i].size(); j++) {
+ stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j]));
+ }
+ }
+ }
+
+ for (int i = 0; i < stdResults.size(); i++) { //finds average.
+ out << aveResults[i][0] << '\t' << "ave\t";
+ for (int j = 1; j < aveResults[i].size(); j++) { out << aveResults[i][j] << '\t'; }
+ out << endl;
+ out << stdResults[i][0] << '\t' << "std\t";
+ for (int j = 1; j < stdResults[i].size(); j++) {
+ stdResults[i][j] /= (float) iters;
+ stdResults[i][j] = sqrt(stdResults[i][j]);
+ out << stdResults[i][j] << '\t';
+ }
+ out << endl;
+ }
+ out.close();
+ }
+ }
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RareFactSharedCommand", "subsample");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
vector<string> RareFactSharedCommand::createGroupFile(vector<string>& outputNames) {
try {
private:
vector<SharedRAbundVector*> lookup;
- int nIters;
+ int nIters, subsampleSize, iters;
string format;
float freq;
map<int, string> file2Group; //index in outputNames[i] -> group
- bool abort, allLines, jumble, groupMode;
+ bool abort, allLines, jumble, groupMode, subsample;
set<string> labels; //holds labels to be used
string label, calc, groups, outputDir, sharedfile, designfile;
vector<string> Estimators, Groups, outputNames, Sets;
int process(GroupMap&, string);
vector<string> createGroupFile(vector<string>&);
+ int subsampleLookup(vector<SharedRAbundVector*>&, string);
};
}
vector<string> tempOutNames;
- outputTypes["contaxonomy"] = tempOutNames;
+ outputTypes["constaxonomy"] = tempOutNames;
outputTypes["otucorr"] = tempOutNames;
outputTypes["corraxes"] = tempOutNames;
#include "sharedcommand.h"
#include "sharedutilities.h"
+#include "counttable.h"
//********************************************************************************************************************
//sorts lowest to highest
try {
CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
+ CommandParameter pcount("count", "InputTypes", "", "", "", "GroupCount", "",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup",false,false); parameters.push_back(pgroup);
//CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
try {
string helpString = "";
helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
- helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
+ helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
+ helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
//helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
if (path == "") { parameters["group"] = inputDir + it->second; }
}
- /*it = parameters.find("ordergroup");
+ it = parameters.find("count");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
- }*/
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
it = parameters.find("biom");
//user has given a template file
if (groupfile == "not open") { groupfile = ""; abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
if ((biomfile == "") && (listfile == "")) {
//is there are current file available for either of these?
else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
if (listfile != "") {
- if (groupfile == "") {
+ if ((groupfile == "") && (countfile == "")) {
groupfile = m->getGroupFile();
if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine();
- abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine();
+ abort = true;
+ }
}
}
}
ifstream in;
m->openInputFile(biomfile, in);
-
- m->getline(in); m->gobble(in); //grab first '{'
-
+
string matrixFormat = "";
int numRows = 0;
int numCols = 0;
int shapeNumCols = 0;
vector<string> otuNames;
vector<string> groupNames;
- while (!in.eof()) {
-
+ map<string, string> fileLines;
+ vector<string> names;
+ int countOpenBrace = 0;
+ int countClosedBrace = 0;
+ int openParen = -1; //account for opening brace
+ int closeParen = 0;
+ bool ignoreCommas = false;
+ bool atComma = false;
+ string line = "";
+ string matrixElementType = "";
+
+ while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
if (m->control_pressed) { break; }
- string line = m->getline(in); m->gobble(in);
+ char c = in.get(); m->gobble(in);
- string tag = getTag(line);
+ if (c == '[') { countOpenBrace++; }
+ else if (c == ']') { countClosedBrace++; }
+ else if (c == '{') { openParen++; }
+ else if (c == '}') { closeParen++; }
+ else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
- if (tag == "type") {
- //check to make sure this is an OTU table
- string type = getTag(line);
- if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
- }else if (tag == "matrix_type") {
- //get type and check type
- matrixFormat = getTag(line);
- if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
- }else if (tag == "matrix_element_type") {
- //get type and check type
- string matrixElementType = getTag(line);
- if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
- }else if (tag == "rows") {
- //read otu names
- otuNames = readRows(line, in, numRows);
- }else if (tag == "columns") {
- //read sample names
- groupNames = readRows(line, in, numCols);
-
- //if users selected groups, then remove the groups not wanted.
- SharedUtil util;
- vector<string> Groups = m->getGroups();
- vector<string> allGroups = groupNames;
- util.setGroups(Groups, allGroups);
- m->setGroups(Groups);
-
- //fill filehandles with neccessary ofstreams
- int i;
- ofstream* temp;
- for (i=0; i<Groups.size(); i++) {
- temp = new ofstream;
- filehandles[Groups[i]] = temp;
- }
-
- //set fileroot
- fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
-
- //clears file before we start to write to it below
- for (int i=0; i<Groups.size(); i++) {
- m->mothurRemove((fileroot + Groups[i] + ".rabund"));
- outputNames.push_back((fileroot + Groups[i] + ".rabund"));
- outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
- }
-
- }else if (tag == "shape") {
- getDims(line, shapeNumRows, shapeNumCols);
-
- //check shape
- if (shapeNumCols != numCols) {
- m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
- }
-
- if (shapeNumRows != numRows) {
- m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
+ if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
+ else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
+ if (atComma && !ignoreCommas) {
+ if (fileLines.size() == 0) { //clip first {
+ line = line.substr(1);
}
- }else if (tag == "data") {
- m->currentBinLabels = otuNames;
+ string tag = getTag(line);
+ fileLines[tag] = line;
+ line = "";
+ atComma = false;
+ ignoreCommas = false;
- //read data
- vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
-
- m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- lookup[0]->printHeaders(out);
- printSharedData(lookup, out);
- }
+ }else { line += c; }
+
+ }
+ if (line != "") {
+ line = line.substr(0, line.length()-1);
+ string tag = getTag(line);
+ fileLines[tag] = line;
}
in.close();
-
+ map<string, string>::iterator it;
+ it = fileLines.find("type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ string type = getTag(thisLine);
+ if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("matrix_type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ matrixFormat = getTag(thisLine);
+ if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("matrix_element_type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ matrixElementType = getTag(thisLine);
+ if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
+ if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("rows");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
+ else {
+ string thisLine = it->second;
+ otuNames = readRows(thisLine, numRows);
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("columns");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
+ else {
+ string thisLine = it->second;
+ //read sample names
+ groupNames = readRows(thisLine, numCols);
+
+ //if users selected groups, then remove the groups not wanted.
+ SharedUtil util;
+ vector<string> Groups = m->getGroups();
+ vector<string> allGroups = groupNames;
+ util.setGroups(Groups, allGroups);
+ m->setGroups(Groups);
+
+ //fill filehandles with neccessary ofstreams
+ int i;
+ ofstream* temp;
+ for (i=0; i<Groups.size(); i++) {
+ temp = new ofstream;
+ filehandles[Groups[i]] = temp;
+ }
+
+ //set fileroot
+ fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
+
+ //clears file before we start to write to it below
+ for (int i=0; i<Groups.size(); i++) {
+ m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+ outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+ outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+ }
+ }
+
+ if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("shape");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
+ else {
+ string thisLine = it->second;
+ getDims(thisLine, shapeNumRows, shapeNumCols);
+
+ //check shape
+ if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
+
+ if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("data");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
+ else {
+ string thisLine = it->second;
+ m->currentBinLabels = otuNames;
+
+ //read data
+ vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
+
+ m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ lookup[0]->printHeaders(out);
+ printSharedData(lookup, out);
+ }
+
+ for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+ out.close();
+
+ if (m->control_pressed) { m->mothurRemove(filename); return 0; }
+
return 0;
}
catch(exception& e) {
}
}
//**********************************************************************************************************************
-vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
+vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
try {
vector<SharedRAbundVector*> lookup;
else if ((line[i] == ']') && (inBrackets)) {
inBrackets = false;
int temp;
- m->mothurConvert(num, temp);
+ float temp2;
+ if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
+ else { m->mothurConvert(num, temp); }
nums.push_back(temp);
num = "";
}
}
- //same as above just reading from file.
- while (!in.eof()) {
-
- char c = in.get(); m->gobble(in);
-
- if (m->control_pressed) { return lookup; }
-
- //look for opening [ to indicate data is starting
- if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get(); if (in.eof()) { break; } }
- else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
-
- if (dataStart) {
- if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get(); if (in.eof()) { break; } }
- else if ((c == ']') && (inBrackets)) {
- inBrackets = false;
- int temp;
- m->mothurConvert(num, temp);
- nums.push_back(temp);
- num = "";
-
- //save info to vectors
- if (matrixFormat == "dense") {
-
- //sanity check
- if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
-
- //set abundances for this otu
- //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
- for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
-
- otuCount++;
- }else {
- //sanity check
- if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
-
- //nums contains [otuNum, sampleNum, abundance]
- lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
- }
- nums.clear();
- }
-
- if (inBrackets) {
- if (c == ',') {
- int temp;
- m->mothurConvert(num, temp);
- nums.push_back(temp);
- num = "";
- }else { if (!isspace(c)) { num += c; } }
- }
- }
- }
-
SharedUtil util;
bool remove = false;
}
}
//**********************************************************************************************************************
-vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
+vector<string> SharedCommand::readRows(string line, int& numRows) {
try {
/*"rows":[
{"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
}
}
- //keep reading
- if (!end) {
- while (!in.eof()) {
-
- if (m->control_pressed) { break; }
-
- char c = in.get(); m->gobble(in);
-
- if (c == '[') { countOpenBrace++; }
- else if (c == ']') { countClosedBrace++; }
- else if (c == '{') { openParen++; }
- else if (c == '}') { closeParen++; }
- else if (openParen != 0) { nextRow += c; } //you are reading the row info
-
-
- //you have reached the end of the rows info
- if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
- if ((openParen == closeParen) && (closeParen != 0)) { //process row
- numRows++;
- vector<string> items;
- m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
- string part = items[0]; items.clear();
- m->splitAtChar(part, items, ':'); //split part we want containing the ids
- string name = items[1];
-
- //remove "" if needed
- int pos = name.find("\"");
- if (pos != string::npos) {
- string newName = "";
- for (int k = 0; k < name.length(); k++) {
- if (name[k] != '\"') { newName += name[k]; }
- }
- name = newName;
- }
- names.push_back(name);
- nextRow = "";
- openParen = 0;
- closeParen = 0;
- }
- }
- }
-
return names;
}
catch(exception& e) {
}
}
//**********************************************************************************************************************
-//designed for things like "type": "OTU table", returns map type -> OTU table
+//designed for things like "type": "OTU table", returns type
string SharedCommand::getTag(string& line) {
try {
bool inQuotes = false;
ofstream out;
m->openOutputFile(filename, out);
- GroupMap* groupMap = new GroupMap(groupfile);
+ GroupMap* groupMap = NULL;
+ CountTable* countTable = NULL;
+ if (groupfile != "") {
+ groupMap = new GroupMap(groupfile);
+
+ int groupError = groupMap->readMap();
+ if (groupError == 1) { delete groupMap; return 0; }
+ vector<string> allGroups = groupMap->getNamesOfGroups();
+ m->setAllGroups(allGroups);
+ }else{
+ countTable = new CountTable();
+ countTable->readTable(countfile);
+ }
- int groupError = groupMap->readMap();
- if (groupError == 1) { delete groupMap; return 0; }
- vector<string> allGroups = groupMap->getNamesOfGroups();
- m->setAllGroups(allGroups);
+ if (m->control_pressed) { return 0; }
pickedGroups = false;
//if hte user has not specified any groups then use them all
if (Groups.size() == 0) {
- Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
+ if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
+ else { Groups = countTable->getNamesOfGroups(); }
+ m->setGroups(Groups);
}else { pickedGroups = true; }
//fill filehandles with neccessary ofstreams
vector<SharedRAbundVector*> lookup;
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
}
//sanity check
- vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
- int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
+ vector<string> namesSeqs;
+ int numGroupNames = 0;
+ if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
+ else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
+ int error = ListGroupSameSeqs(namesSeqs, SharedList);
- if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
- m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
+ if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
+ m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
- out.close();
- m->mothurRemove(filename); //remove blank shared file you made
-
- createMisMatchFile(SharedList, groupMap);
+ out.close(); m->mothurRemove(filename); //remove blank shared file you made
//delete memory
- for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
- delete it3->second;
- }
-
- delete SharedList; delete groupMap;
-
+ for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
return 0;
}
if (error == 1) { m->control_pressed = true; }
//if user has specified groups make new groupfile for them
- if (pickedGroups) { //make new group file
+ if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
string groups = "";
if (m->getNumGroups() < 4) {
for (int i = 0; i < m->getNumGroups(); i++) {
while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
}
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
if (m->control_pressed) {
- delete SharedList; delete groupMap;
+ delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
}
if (m->control_pressed) {
- delete groupMap;
+ if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
out.close(); m->mothurRemove(filename);
for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
delete it3->second;
}
- delete groupMap;
+ if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
if (m->control_pressed) {
m->mothurRemove(filename);
}
}
//**********************************************************************************************************************
-int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
- try {
- ofstream outMisMatch;
- string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
-
- //you have sequences in your list file that are not in your group file
- if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
- outputMisMatchName += "missing.group";
- m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-
- m->openOutputFile(outputMisMatchName, outMisMatch);
-
- set<string> listNames;
- set<string>::iterator itList;
-
- //go through list and if group returns "not found" output it
- for (int i = 0; i < SharedList->getNumBins(); i++) {
- if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
-
- string names = SharedList->get(i);
-
- vector<string> binNames;
- m->splitAtComma(names, binNames);
-
- for (int j = 0; j < binNames.size(); j++) {
- string name = binNames[j];
- string group = groupMap->getGroup(name);
-
- if(group == "not found") { outMisMatch << name << endl; }
-
- itList = listNames.find(name);
- if (itList != listNames.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
- else { listNames.insert(name); }
- }
- }
-
- outMisMatch.close();
-
-
- }else {//you have sequences in your group file that are not in you list file
-
- outputMisMatchName += "missing.name";
- m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
-
- map<string, string> namesInList;
- map<string, string>::iterator itList;
-
- //go through listfile and get names
- for (int i = 0; i < SharedList->getNumBins(); i++) {
- if (m->control_pressed) { return 0; }
-
-
- string names = SharedList->get(i);
-
- vector<string> binNames;
- m->splitAtComma(names, binNames);
-
- for (int j = 0; j < binNames.size(); j++) {
-
- string name = binNames[j];
-
- itList = namesInList.find(name);
- if (itList != namesInList.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
-
- namesInList[name] = name;
-
- }
- }
-
- //get names of sequences in groupfile
- vector<string> seqNames = groupMap->getNamesSeqs();
-
- map<string, string>::iterator itMatch;
-
- m->openOutputFile(outputMisMatchName, outMisMatch);
-
- //loop through names in seqNames and if they aren't in namesIn list output them
- for (int i = 0; i < seqNames.size(); i++) {
- if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
-
- itMatch = namesInList.find(seqNames[i]);
-
- if (itMatch == namesInList.end()) {
-
- outMisMatch << seqNames[i] << endl;
- }
- }
- outMisMatch.close();
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SharedCommand", "createMisMatchFile");
- exit(1);
- }
-}
-//**********************************************************************************************************************
int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
try {
int error = 0;
private:
void printSharedData(vector<SharedRAbundVector*>, ofstream&);
- int createMisMatchFile(SharedListVector*, GroupMap*);
int readOrderFile();
bool isValidGroup(string, vector<string>);
int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
int createSharedFromListGroup(string);
int createSharedFromBiom(string);
string getTag(string&);
- vector<string> readRows(string, ifstream&, int&);
+ vector<string> readRows(string, int&);
int getDims(string, int&, int&);
- vector<SharedRAbundVector*> readData(string, string, ifstream&, vector<string>&, int);
+ vector<SharedRAbundVector*> readData(string, string, string, vector<string>&, int);
vector<string> Groups, outputNames, order;
set<string> labels;
- string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile;
+ string fileroot, outputDir, listfile, groupfile, biomfile, ordergroupfile, countfile;
bool firsttime, pickedGroups, abort, allLines;
map<string, ofstream*> filehandles;
map<string, ofstream*>::iterator it3;
/***********************************************************************/
-SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
/***********************************************************************/
-SharedListVector::SharedListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; }
+SharedListVector::SharedListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0){ groupmap = NULL; countTable = NULL; }
/***********************************************************************/
SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
try {
//set up groupmap for later.
- groupmap = new GroupMap(m->getGroupFile());
- groupmap->readMap();
+ if (m->groupMode == "group") {
+ groupmap = new GroupMap(m->getGroupFile());
+ groupmap->readMap();
+ }else {
+ countTable = new CountTable();
+ countTable->readTable(m->getCountTableFile());
+ }
int hold;
string inputData;
/***********************************************************************/
SharedOrderVector* SharedListVector::getSharedOrderVector(){
try {
- string groupName, names, name;
-
SharedOrderVector* order = new SharedOrderVector();
order->setLabel(label);
for(int i=0;i<numBins;i++){
int binSize = m->getNumNames(get(i)); //find number of individual in given bin
- names = get(i);
- while (names.find_first_of(',') != -1) {
- name = names.substr(0,names.find_first_of(','));
- names = names.substr(names.find_first_of(',')+1, names.length());
- groupName = groupmap->getGroup(name);
-
- if(groupName == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
+ string names = get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ if (m->groupMode != "group") {
+ binSize = 0;
+ for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); }
+ }
+ for (int j = 0; j < binNames.size(); j++) {
+ if (m->control_pressed) { return order; }
+ if (m->groupMode == "group") {
+ string groupName = groupmap->getGroup(binNames[i]);
+ if(groupName == "not found") { m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- order->push_back(i, binSize, groupName); //i represents what bin you are in
+ order->push_back(i, binSize, groupName); //i represents what bin you are in
+ }else {
+ vector<int> groupAbundances = countTable->getGroupCounts(binNames[i]);
+ vector<string> groupNames = countTable->getNamesOfGroups();
+ for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true.
+ if (m->control_pressed) { return order; }
+ for (int l = 0; l < groupAbundances[k]; l++) { order->push_back(i, binSize, groupNames[k]); }
+ }
+ }
}
- //get last name
- groupName = groupmap->getGroup(names);
- if(groupName == "not found") { m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- order->push_back(i, binSize, groupName);
}
random_shuffle(order->begin(), order->end());
SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
try {
SharedRAbundVector rav(data.size());
- string group, names, name;
for(int i=0;i<numBins;i++){
- names = get(i);
- while (names.find_first_of(',') != -1) {
- name = names.substr(0,names.find_first_of(','));
- names = names.substr(names.find_first_of(',')+1, names.length());
- group = groupmap->getGroup(name);
- if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- if (group == groupName) { //this name is in the group you want the vector for.
- rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
- }
- }
-
- //get last name
- groupName = groupmap->getGroup(names);
- if(groupName == "not found") { m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- if (group == groupName) { //this name is in the group you want the vector for.
- rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
+ string names = get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ for (int j = 0; j < binNames.size(); j++) {
+ if (m->control_pressed) { return rav; }
+ if (m->groupMode == "group") {
+ string group = groupmap->getGroup(binNames[j]);
+ if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
+ if (group == groupName) { //this name is in the group you want the vector for.
+ rav.set(i, rav.getAbundance(i) + 1, group); //i represents what bin you are in
+ }
+ }else {
+ int count = countTable->getGroupCount(binNames[j], groupName);
+ rav.set(i, rav.getAbundance(i) + count, groupName);
+ }
}
}
SharedUtil* util;
util = new SharedUtil();
vector<SharedRAbundVector*> lookup; //contains just the groups the user selected
+ vector<SharedRAbundVector*> lookupDelete;
map<string, SharedRAbundVector*> finder; //contains all groups in groupmap
- string group, names, name;
vector<string> Groups = m->getGroups();
- vector<string> allGroups = groupmap->getNamesOfGroups();
+ vector<string> allGroups;
+ if (m->groupMode == "group") { allGroups = groupmap->getNamesOfGroups(); }
+ else { allGroups = countTable->getNamesOfGroups(); }
util->setGroups(Groups, allGroups);
m->setGroups(Groups);
delete util;
finder[allGroups[i]]->setGroup(allGroups[i]);
if (m->inUsersGroups(allGroups[i], m->getGroups())) { //if this group is in user groups
lookup.push_back(finder[allGroups[i]]);
- }
+ }else {
+ lookupDelete.push_back(finder[allGroups[i]]);
+ }
}
//fill vectors
for(int i=0;i<numBins;i++){
- names = get(i);
- int nameLength = names.size();
- string seqName = "";
-
- for(int j=0;j<nameLength;j++){
- if(names[j] == ','){
- group = groupmap->getGroup(seqName);
- if(group == "not found") { m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-
- seqName = "";
- }
- else{
- seqName += names[j];
- }
+ string names = get(i);
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ for (int j = 0; j < binNames.size(); j++) {
+ if (m->groupMode == "group") {
+ string group = groupmap->getGroup(binNames[j]);
+ if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
+ finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
+ }else{
+ vector<int> counts = countTable->getGroupCounts(binNames[j]);
+ for (int k = 0; k < allGroups.size(); k++) {
+ finder[allGroups[k]]->set(i, finder[allGroups[k]]->getAbundance(i) + counts[k], allGroups[k]);
+ }
+ }
}
- group = groupmap->getGroup(seqName);
- if(group == "not found") { m->mothurOut("Error: Sequence '" + seqName + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
- finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-
-
-
-// while (names.find_first_of(',') != -1) {
-// name = names.substr(0,names.find_first_of(','));
-// names = names.substr(names.find_first_of(',')+1, names.length());
-// group = groupmap->getGroup(name);
-// if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
-// finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-// }
-
- //get last name
-// group = groupmap->getGroup(names);
-// if(group == "not found") { m->mothurOut("Error: Sequence '" + names + "' was not found in the group file, please correct."); m->mothurOutEndLine(); exit(1); }
-// finder[group]->set(i, finder[group]->getAbundance(i) + 1, group); //i represents what bin you are in
-
}
+
+ for (int j = 0; j < lookupDelete.size(); j++) { delete lookupDelete[j]; }
return lookup;
}
OrderVector ov;
for(int i=0;i<data.size();i++){
- int binSize = m->getNumNames(data[i]);
+ string names = data[i];
+ vector<string> binNames;
+ m->splitAtComma(names, binNames);
+ int binSize = binNames.size();
+ if (m->groupMode != "group") {
+ binSize = 0;
+ for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); }
+ }
for(int j=0;j<binSize;j++){
ov.push_back(i);
}
for(int i=0;i<data.size();i++){
string listOTU = data[i];
- int length = listOTU.size();
-
- string seqName="";
-
- for(int j=0;j<length;j++){
-
- if(listOTU[j] != ','){
- seqName += listOTU[j];
- }
- else{
- if(orderMap->count(seqName) == 0){
- m->mothurOut(seqName + " not found, check *.names file\n");
- exit(1);
- }
-
- ov.set((*orderMap)[seqName], i);
- seqName = "";
- }
- }
-
- if(orderMap->count(seqName) == 0){
- m->mothurOut(seqName + " not found, check *.names file\n");
- exit(1);
+ vector<string> binNames;
+ m->splitAtComma(listOTU, binNames);
+ for (int j = 0; j < binNames.size(); j++) {
+ if(orderMap->count(binNames[j]) == 0){
+ m->mothurOut(binNames[j] + " not found, check *.names file\n");
+ exit(1);
+ }
+ ov.set((*orderMap)[binNames[j]], i);
}
- ov.set((*orderMap)[seqName], i);
}
ov.setLabel(label);
#include "datavector.hpp"
#include "groupmap.h"
+#include "counttable.h"
#include "sharedrabundvector.h"
#include "sharedsabundvector.h"
SharedListVector();
SharedListVector(int);
SharedListVector(ifstream&);
- SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; };
- ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } };
+ SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; };
+ ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } };
int getNumBins() { return numBins; }
int getNumSeqs() { return numSeqs; }
private:
vector<string> data; //data[i] is a list of names of sequences in the ith OTU.
GroupMap* groupmap;
+ CountTable* countTable;
int maxRank;
int numBins;
int numSeqs;
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
- string groupFileName = fileRoot + getOutputFileNameTag("group");
+ string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+ int pos = fileRoot.find_first_of('.');
+ string fileGroup = fileRoot;
+ if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); }
+ string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
for(int i=0;i<numSeqs;i++){
if (m->control_pressed) { break; }
- groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+ groupFile << seqNameVector[i] << '\t' << fileGroup << endl;
}
groupFile.close();
outputNames.push_back(groupFileName);
string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("counts");
- string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
- string groupFileName = fileRoot + getOutputFileNameTag("group");
+ string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
+ int pos = fileRoot.find_first_of('.');
+ string fileGroup = fileRoot;
+ if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); }
+ string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (m->control_pressed) { break; }
writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (m->control_pressed) { break; }
- writeGroups(groupFileName, fileRoot, numSeqs, seqNameVector); if (m->control_pressed) { break; }
+ writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector); if (m->control_pressed) { break; }
if (large) {
if (g > 0) {
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
- CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+ CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
string temp = validParameter.validFile(parameters, "distance", false);
if (temp == "not found") { phylip = false; outputForm = ""; }
else{
+ if (temp=="phylip") { temp = "lt"; }
if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
}
CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
- CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+ CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string temp = validParameter.validFile(parameters, "distance", false);
if (temp == "not found") { phylip = false; outputForm = ""; }
else{
+ if (temp=="phylip") { temp = "lt"; }
if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
}
}else if (Estimators[i] == "chao") {
vennCalculators.push_back(new Chao1());
}else if (Estimators[i] == "ace") {
- if(abund < 5)
- abund = 10;
+ if(abund < 5) { abund = 10; }
vennCalculators.push_back(new Ace(abund));
}
}