}
//if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = hasPath(globaldata->getTreeFile()); }
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(globaldata->getTreeFile()); }
if (globaldata->gTree.size() == 0) {//no trees were read
m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true; }
convert(temp, iters);
temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
- rarefy = isTrue(temp);
+ rarefy = m->isTrue(temp);
if (!rarefy) { iters = 1; }
temp = validParameter.validFile(parameters, "summary", false); if (temp == "not found") { temp = "T"; }
- summary = isTrue(temp);
+ summary = m->isTrue(temp);
temp = validParameter.validFile(parameters, "scale", false); if (temp == "not found") { temp = "F"; }
- scale = isTrue(temp);
+ scale = m->isTrue(temp);
temp = validParameter.validFile(parameters, "collect", false); if (temp == "not found") { temp = "F"; }
- collect = isTrue(temp);
+ collect = m->isTrue(temp);
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups; globaldata->Groups = Groups; }
else {
- splitAtDash(groups, Groups);
+ m->splitAtDash(groups, Groups);
globaldata->Groups = Groups;
}
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
ofstream outSum, outRare, outCollect;
- string outSumFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylodiv.summary";
- string outRareFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylodiv.rarefaction";
- string outCollectFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylodiv";
+ string outSumFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylodiv.summary";
+ string outRareFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylodiv.rarefaction";
+ string outCollectFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylodiv";
- if (summary) { openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); }
- if (rarefy) { openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); }
- if (collect) { openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); }
+ if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); }
+ if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); }
+ if (collect) { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); }
int numLeafNodes = trees[i]->getNumLeaves();
//create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
vector<int> randomLeaf;
for (int j = 0; j < numLeafNodes; j++) {
- if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
+ if (m->inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
randomLeaf.push_back(j);
}
}
//initialize counts
map<string, int> counts;
- for (int j = 0; j < globaldata->Groups.size(); j++) { counts[globaldata->Groups[j]] = 0; }
+ map< string, set<int> > countedBranch;
+ for (int j = 0; j < globaldata->Groups.size(); j++) { counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets
for(int k = 0; k < numLeafNodes; k++){
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
//calc branch length of randomLeaf k
- float br = calcBranchLength(trees[i], randomLeaf[k]);
+ float br = calcBranchLength(trees[i], randomLeaf[k], countedBranch);
//for each group in the groups update the total branch length accounting for the names file
vector<string> groups = trees[i]->tree[randomLeaf[k]].getGroup();
+
for (int j = 0; j < groups.size(); j++) {
int numSeqsInGroupJ = 0;
map<string, int>::iterator it;
if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
numSeqsInGroupJ = it->second;
}
-
- for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
- diversity[groups[j]][s] = diversity[groups[j]][s-1] + ((float) numSeqsInGroupJ * br);
+
+ if (numSeqsInGroupJ != 0) { diversity[groups[j]][(counts[groups[j]]+1)] = diversity[groups[j]][counts[groups[j]]] + br; }
+
+ for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+ diversity[groups[j]][s] = diversity[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
}
counts[groups[j]] += numSeqsInGroupJ;
}
void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
try {
- out << "numSampled\t";
- for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t'; }
- out << endl;
+ out << "Groups\tnumSampled\tphyloDiversity" << endl;
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-
- set<int> num;
- //find end points to output
- for (map<string, vector<float> >::iterator itEnds = div.begin(); itEnds != div.end(); itEnds++) { num.insert(itEnds->second.size()-1); }
-
- for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
- int numSampled = *it;
-
- out << numSampled << '\t';
- for (int j = 0; j < globaldata->Groups.size(); j++) {
- if (numSampled < div[globaldata->Groups[j]].size()) {
- float score;
- if (scale) { score = (div[globaldata->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
- else { score = div[globaldata->Groups[j]][numSampled] / (float)numIters; }
-
- out << setprecision(4) << score << '\t';
- }else { out << "NA" << '\t'; }
- }
- out << endl;
- }
+ for (int j = 0; j < globaldata->Groups.size(); j++) {
+ int numSampled = (div[globaldata->Groups[j]].size()-1);
+ out << globaldata->Groups[j] << '\t' << numSampled << '\t';
+
+ float score;
+ if (scale) { score = (div[globaldata->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
+ else { score = div[globaldata->Groups[j]][numSampled] / (float)numIters; }
+
+ out << setprecision(4) << score << endl;
+ }
+
out.close();
}
}
}
//**********************************************************************************************************************
-float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){
+float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
try {
//calc the branch length
//while you aren't at root
float sum = 0.0;
int index = leaf;
-
+
+ vector<string> groups = t->tree[leaf].getGroup();
+
while(t->tree[index].getParent() != -1){
//if you have a BL
if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
+ if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+ sum += abs(t->tree[index].getBranchLength());
+ for (int j = 0; j < groups.size(); j++) { counted[groups[j]].insert(index); }
+ }
}
index = t->tree[index].getParent();
}
//get last breanch length added
if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
+ if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+ sum += abs(t->tree[index].getBranchLength());
+ for (int j = 0; j < groups.size(); j++) { counted[groups[j]].insert(index); }
+ }
}
return sum;