if (parameter == "name" ) { namefile = value; }
if (parameter == "order" ) { orderfile = value; }
if (parameter == "fasta" ) { fastafile = value; }
- if (parameter == "treefile" ) { treefile = value; }
+ if (parameter == "tree" ) { treefile = value; }
if (parameter == "group" ) { groupfile = value; }
if (parameter == "cutoff" ) { cutoff = value; }
if (parameter == "precision" ) { precision = value; }
if (parameter == "order" ) { orderfile = value; }
if (parameter == "group" ) { groupfile = value; }
if (parameter == "fasta" ) { fastafile = value; }
- if (parameter == "treefile" ) { treefile = value; }
+ if (parameter == "tree" ) { treefile = value; }
if (parameter == "cutoff" ) { cutoff = value; }
if (parameter == "precision" ) { precision = value; }
if (parameter == "iters" ) { iters = value; }
}
}
+ if ((commandName == "unifrac.weighted") || (commandName == "unifrac.unweighted")) {
+ if (globaldata->gTree.size() == 0) {//no trees were read
+ cout << "You must execute the read.tree command, before you may execute the unifrac.weighted or unifrac.unweighted command." << endl; return false; }
+ }
+
//check for valid method
if (commandName == "cluster") {
if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; }
if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; }
if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; }
- if (key == "treefile" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; }
+ if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; }
if (key == "name" ) { namefile = value; }
if (key == "order" ) { orderfile = value; }
if (key == "group" ) { groupfile = value; }
splitAtDash(value, lines);
allLines = 0;
}
- if (key == "label") {//stores lines to be used in a set
+ if (key == "label") {//stores labels to be used in a set
labels.clear();
label = value;
line = "";
splitAtDash(value, labels);
allLines = 0;
}
- if (key == "groups") {//stores lines to be used in a vector
+ if (key == "groups") {//stores groups to be used in a vector
Groups.clear();
groups = value;
splitAtDash(value, Groups);
if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; }
if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; }
if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; }
- if (key == "treefile" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; }
+ if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; }
if (key == "name" ) { namefile = value; }
if (key == "order" ) { orderfile = value; }
if (key == "group" ) { groupfile = value; }
if (key == "method" ) { method = value; }
if (key == "fileroot" ) { fileroot = value; }
if (key == "randomtree" ) { randomtree = value; }
+ if (key == "groups" ) { groups = value; }
+
if (key == "single") {//stores estimators in a vector
singleEstimators.clear(); //clears out old values
splitAtDash(value, labels);
allLines = 0;
}
+ if (key == "groups") {//stores groups to be used in a vector
+ Groups.clear();
+ groups = value;
+ splitAtDash(value, Groups);
+ }
+
}
//set format for shared
iters = "1000";
line = "";
label = "";
+ groups = "";
jumble = "1"; //0 means don't jumble, 1 means jumble.
randomtree = "0"; //0 means user will enter some user trees, 1 means they just want the random tree distribution.
freq = "100";
cout << "The list parameter and group paramaters are required. When using the command the second way read.otu command parses the .list file" << "\n";
cout << "and separates it into groups. It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. " << "\n";
cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "read.tree") {
+ cout << "The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. " << "\n";
+ cout << "It also must be run before using the parsimony command, unless you are using the randomtree parameter." << "\n";
+ cout << "The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile)." << "\n";
+ cout << "The tree and group parameters are both required." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
}else if (globaldata->helpRequest == "cluster") {
cout << "The cluster command can only be executed after a successful read.dist command." << "\n";
cout << "The cluster command parameter options are method, cuttoff and precision. No parameters are required." << "\n";
cout << "The default value for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble) and sharedsummary is sharedsobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN" << "\n";
cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "parsimony") {
+ cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the randomtree parameter." << "\n";
+ cout << "The parsimony command parameters are randomtree and iters. No parameters are required." << "\n";
+ cout << "The parsimony command should be in the following format: parsimony(randomtree=yourRandomTreeValue, iters=yourIters)." << "\n";
+ cout << "Example parsimony(randomtree=1, iters=500)." << "\n";
+ cout << "The default value for randomTree is 0 (meaning you want to use the trees in your inputfile, randomtree=1 means you just want the random distribution of trees)," << "\n";
+ cout << "and iters is 1000. The parsimony command output three files: .parsimony, .psummary and .pdistrib, their descriptions are in the manual." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "unifrac.weighted") {
+ cout << "The unifrac.weighted command can only be executed after a successful read.tree command." << "\n";
+ cout << "The unifrac.weighted command parameters are groups and iters. No parameters are required." << "\n";
+ cout << "The groups paramter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n";
+ cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n";
+ cout << "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters)." << "\n";
+ cout << "Example unifrac.weighted(groups=A-B-C, iters=500)." << "\n";
+ cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n";
+ cout << "The unifrac.weighted command output three files: .weighted, .wsummary and .wdistrib, their descriptions are in the manual." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "unifrac.unweighted") {
+ cout << "The unifrac.unweighted command can only be executed after a successful read.tree command." << "\n";
+ cout << "The unifrac.unweighted command parameters are groups and iters. No parameters are required." << "\n";
+ cout << "The groups paramter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group." << "\n";
+ cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree." << "\n";
+ cout << "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters)." << "\n";
+ cout << "Example unifrac.unweighted(groups=A-B-C, iters=500)." << "\n";
+ cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n";
+ cout << "The unifrac.unweighted command output three files: .unweighted, .uwsummary and .uwdistrib, their descriptions are in the manual." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
}else if (globaldata->helpRequest == "quit") {
cout << "The quit command will terminate Dotur and should be in the following format: " << "\n";
cout << "quit()" << "\n" << "\n";
}else if (globaldata->helpRequest == "") {
- cout << "Valid commands are read.dist(), read.otu(), cluster(), deconvolute(), collect.single(), rarefaction.single(), summary.single(), collect.shared(), rarefaction.shared(), summary.shared(), quit(), help()." << "\n";
+ cout << "Valid commands are read.dist(), read.otu(), read.tree(), cluster(), deconvolute(), collect.single(), rarefaction.single(), summary.single(), collect.shared(), rarefaction.shared(), summary.shared(), parsimony(), unifrac.weighted(), unifrac.unweighted(), quit(), help()." << "\n";
cout << "For more information about a specific command type 'help(commandName)' i.e. 'help(read.dist)'" << endl;
}else {
cout << globaldata->helpRequest << " is not a valid command" << endl;
}
- cout << endl << "For further assistance please refer to the Mothur manual, or contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << endl << "For further assistance please refer to the Mothur manual on our wiki at http://schloss.micro.umass.edu/mothur/, or contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
return 0;
}
namesOfGroups.push_back(seqGroup); //new group
}
}
-
+/************************************************************/
+bool TreeMap::isValidGroup(string groupname) {
+ try {
+ for (int i = 0; i < namesOfGroups.size(); i++) {
+ if (groupname == namesOfGroups[i]) { return true; }
+ }
+
+ return false;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the TreeMap class Function isValidGroup. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the TreeMap class function isValidGroup. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
/***********************************************************************/
void TreeMap::print(ostream& output){
int getNumSeqs();
void setIndex(string, int); //sequencename, index
int getIndex(string); //returns vector index of sequence
+ bool isValidGroup(string); //return true if string is a valid group
string getGroup(string);
vector<string> namesOfGroups;
vector<string> namesOfSeqs;
map<string,int> seqsPerGroup; //groupname, number of seqs in that group.
- map<string, GroupIndex> treemap; //sequence name and groupname
+ map<string, GroupIndex> treemap; //sequence name and <groupname, vector index>
void print(ostream&);
private:
distFile = globaldata->getTreeFile() + ".uwdistrib";
openOutputFile(distFile, outDist);
+ //if the user has not entered specific groups to analyze then do them all
+ if (globaldata->Groups.size() != 0) {
+ //check that groups are valid
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
+ cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
+ // erase the invalid group from globaldata->Groups
+ globaldata->Groups.erase (globaldata->Groups.begin()+i);
+ }
+ }
+
+ //if the user only entered invalid groups
+ if (globaldata->Groups.size() == 0) {
+ cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl;
+ }
+ }
+
convert(globaldata->getIters(), iters); //how many random trees to generate
unweighted = new Unweighted(tmap);
outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl;
}
+ saveRandomScores(); //save all random scores for unweighted file
+
//find the signifigance of the score
float rcumul = 0.0000;
for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) {
//save the signifigance of the users score for printing later
UWScoreSig.push_back(rCumul[userData[0]]);
- saveRandomScores(); //save all random scores for unweighted file
-
+
//clear random data
rscoreFreq.clear(); //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees.
rCumul.clear();
/***********************************************************/
void UnifracUnweightedCommand::saveRandomScores() {
try {
- //update total map with new random scores
for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) {
//does this score already exist in the total map
it2 = totalrscoreFreq.find(it->first);
//if yes then add them
if (it2 != totalrscoreFreq.end()) {
- it2->second += it->second;
+ totalrscoreFreq[it->first] += rscoreFreq[it->first];
}else{ //its a new score
- totalrscoreFreq[it->first] = 1;
+ totalrscoreFreq[it->first] = rscoreFreq[it->first];
}
}
}
openOutputFile(sumFile, outSum);
distFile = globaldata->getTreeFile() + ".wdistrib";
openOutputFile(distFile, outDist);
-
- numGroups = tmap->getNumGroups();
+
+ //if the user has not entered specific groups to analyze then do them all
+ if (globaldata->Groups.size() == 0) {
+ numGroups = tmap->getNumGroups();
+ }else {
+ //check that groups are valid
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
+ cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
+ // erase the invalid group from globaldata->Groups
+ globaldata->Groups.erase (globaldata->Groups.begin()+i);
+ }
+ }
+
+ //if the user only entered invalid groups
+ if (globaldata->Groups.size() == 0) {
+ numGroups = tmap->getNumGroups();
+ cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
+ }else if (globaldata->Groups.size() == 1) {
+ cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
+ numGroups = tmap->getNumGroups();
+ globaldata->Groups.clear();
+ }else { numGroups = globaldata->Groups.size(); }
+ }
//calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
numComp = 0;
numComp += i;
for (int l = n; l < numGroups; l++) {
//set group comparison labels
- groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
+ if (globaldata->Groups.size() != 0) {
+ groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]);
+ }else {
+ groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
+ }
}
n++;
}
//reset randomTree parameter to 0
globaldata->setRandomTree("0");
+ //clear out users groups
+ globaldata->Groups.clear();
delete randT;
EstOutput Unweighted::getValues(Tree* t) {
try {
-
+ globaldata = GlobalData::getInstance();
+
//clear out old values
data.resize(1,0);
penalty.resize(t->getNumLeaves(), 0);
map<string,double>::iterator pos;
for(pos=unique.begin();pos!=unique.end();pos++){
- if(pos->first!="xxx"){
+ if((pos->first!="xxx") && (inUsersGroups(pos->first))){
UW += unique[pos->first];
}
}
}
-/**************************************************************************************************/
\ No newline at end of file
+/**************************************************************************************************/
+bool Unweighted::inUsersGroups(string groupname) {
+ try {
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ if (groupname == globaldata->Groups[i]) { return true; }
+ }
+ return false;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the Unweighted class Function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the Unweighted class function inUsersGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
\ No newline at end of file
EstOutput getValues(Tree*);
private:
+ GlobalData* globaldata;
EstOutput data;
vector<int> penalty;
TreeMap* tmap;
+ bool inUsersGroups(string);
};
if ((commands.find(command)) != (commands.end())) {
return true;
}else{
- cout << command << " is not a valid command in Mothur. Valid commands are read.dist(), read.otu(), cluster(), collect.single(), collect.shared(), rarefaction.single(), rarefaction.shared(), summary.single(), summary.shared(), quit(), help()." << endl;
+ cout << command << " is not a valid command in Mothur. Valid commands are read.dist(), read.otu(), read.tree(), cluster(), deconvolute(), collect.single(), collect.shared(), rarefaction.single(), rarefaction.shared(), summary.single(), summary.shared(), parsimony(), unifrac.weighted(), unifrac.unweighted(), quit(), help()." << endl;
return false;
}
parameters["group"] = "group";
parameters["order"] = "order";
parameters["fasta"] = "fasta";
- parameters["treefile"] = "treefile";
+ parameters["tree"] = "tree";
parameters["fileroot"] = "fileroot";
parameters["cutoff"] = "cutoff";
parameters["method"] = "method";
EstOutput Weighted::getValues(Tree* t) {
try {
-
- int numGroups = tmap->getNumGroups();
+ globaldata = GlobalData::getInstance();
+ int numGroups;
+
+ //if the user has not entered specific groups to analyze then do them all
+ if (globaldata->Groups.size() == 0) {
+ numGroups = tmap->getNumGroups();
+ }else {
+ numGroups = globaldata->Groups.size();
+ }
//calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
int n = 1;
for (int i=1; i<numGroups; i++) {
for (int l = n; l < numGroups; l++) {
//initialize weighted scores
- WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] = 0.0;
+ if (globaldata->Groups.size() == 0) {
+ WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] = 0.0;
+ }else {
+ WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] = 0.0;
+ }
}
}
for (int b=1; b<numGroups; b++) {
for (int l = n; l < numGroups; l++) {
double u;
- //does this node have descendants from group b-1
- it = t->tree[i].pcount.find(tmap->namesOfGroups[b-1]);
- //if it does u = # of its descendants with a certain group / total number in tree with a certain group
- if (it != t->tree[i].pcount.end()) {
- u = (double) t->tree[i].pcount[tmap->namesOfGroups[b-1]] / (double) tmap->seqsPerGroup[tmap->namesOfGroups[b-1]];
- }else { u = 0.00; }
+ //the user has not entered specific groups
+ if (globaldata->Groups.size() == 0) {
+ //does this node have descendants from group b-1
+ it = t->tree[i].pcount.find(tmap->namesOfGroups[b-1]);
+ //if it does u = # of its descendants with a certain group / total number in tree with a certain group
+ if (it != t->tree[i].pcount.end()) {
+ u = (double) t->tree[i].pcount[tmap->namesOfGroups[b-1]] / (double) tmap->seqsPerGroup[tmap->namesOfGroups[b-1]];
+ }else { u = 0.00; }
- //does this node have descendants from group l
- it = t->tree[i].pcount.find(tmap->namesOfGroups[l]);
- //if it does subtract their percentage from u
- if (it != t->tree[i].pcount.end()) {
- u -= (double) t->tree[i].pcount[tmap->namesOfGroups[l]] / (double) tmap->seqsPerGroup[tmap->namesOfGroups[l]];
- }
+ //does this node have descendants from group l
+ it = t->tree[i].pcount.find(tmap->namesOfGroups[l]);
+ //if it does subtract their percentage from u
+ if (it != t->tree[i].pcount.end()) {
+ u -= (double) t->tree[i].pcount[tmap->namesOfGroups[l]] / (double) tmap->seqsPerGroup[tmap->namesOfGroups[l]];
+ }
- u = abs(u) * t->tree[i].getBranchLength();
+ u = abs(u) * t->tree[i].getBranchLength();
- //save groupcombs u value
- WScore[tmap->namesOfGroups[b-1]+tmap->namesOfGroups[l]] += u;
-
+ //save groupcombs u value
+ WScore[tmap->namesOfGroups[b-1]+tmap->namesOfGroups[l]] += u;
+
+ //the user has entered specific groups
+ }else {
+ //does this node have descendants from group b-1
+ it = t->tree[i].pcount.find(globaldata->Groups[b-1]);
+ //if it does u = # of its descendants with a certain group / total number in tree with a certain group
+ if (it != t->tree[i].pcount.end()) {
+ u = (double) t->tree[i].pcount[globaldata->Groups[b-1]] / (double) tmap->seqsPerGroup[globaldata->Groups[b-1]];
+ }else { u = 0.00; }
+
+ //does this node have descendants from group l
+ it = t->tree[i].pcount.find(globaldata->Groups[l]);
+ //if it does subtract their percentage from u
+ if (it != t->tree[i].pcount.end()) {
+ u -= (double) t->tree[i].pcount[globaldata->Groups[l]] / (double) tmap->seqsPerGroup[globaldata->Groups[l]];
+ }
+
+ u = abs(u) * t->tree[i].getBranchLength();
+
+ //save groupcombs u value
+ WScore[globaldata->Groups[b-1]+globaldata->Groups[l]] += u;
+ }
}
n++;
}
n = 1;
for (int i=1; i<numGroups; i++) {
for (int l = n; l < numGroups; l++) {
- UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D);
+ //the user has not entered specific groups
+ if (globaldata->Groups.size() == 0) {
+ UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D);
+ }else {//they have entered specific groups
+ UN = (WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] / D);
+ }
if (isnan(UN) || isinf(UN)) { UN = 0; }
data.push_back(UN);
}
EstOutput getValues(Tree*);
private:
+ GlobalData* globaldata;
EstOutput data;
TreeMap* tmap;
map<string, int>::iterator it;