CXXFLAGS += -O3
-MOTHUR_FILES = "\"Enter_your_default_path_here\""
+MOTHUR_FILES = "\"../Release\""
ifeq ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"")
else
CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES}
}
}
i -= insertLength;
- //if (i < 0) { cout << " we have a negative i = " << i << endl; }
}
else{
}
// i -= insertLength;
+
+ //if i is negative, we want to remove the extra gaps to the right
+ if (i < 0) { cout << "i is negative" << endl; }
}
}
}
#include "phylodiversity.h"
-/**************************************************************************************************/
-EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes) {
+/**************************************************************************************************
+EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes, vector< vector<float> >& data) {
try {
map<string, float> DScore;
//initialize Dscore
for (int i=0; i<globaldata->Groups.size(); i++) { DScore[globaldata->Groups[i]] = 0.0; }
- /********************************************************/
+ /********************************************************
//calculate a D value for each group
for(int v=0;v<treeNodes.size();v++){
if (m->control_pressed) { return data; }
- //is this node from a sequence which is in one of the users groups
- if (inUsersGroups(t->tree[treeNodes[v]].getGroup(), globaldata->Groups) == true) {
-
- //calc the branch length
- //while you aren't at root
- float sum = 0.0;
- int index = treeNodes[v];
+ //calc the branch length
+ //while you aren't at root
+ float sum = 0.0;
+ int index = treeNodes[v];
- while(t->tree[index].getParent() != -1){
-
- //if you have a BL
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
- index = t->tree[index].getParent();
- }
-
- //get last breanch length added
+ while(t->tree[index].getParent() != -1){
+
+ //if you have a BL
if(t->tree[index].getBranchLength() != -1){
sum += abs(t->tree[index].getBranchLength());
}
-
- //for each group in the groups update the total branch length accounting for the names file
- vector<string> groups = t->tree[treeNodes[v]].getGroup();
- for (int j = 0; j < groups.size(); j++) {
- int numSeqsInGroupJ = 0;
- map<string, int>::iterator it;
- it = t->tree[treeNodes[v]].pcount.find(groups[j]);
- if (it != t->tree[treeNodes[v]].pcount.end()) { //this leaf node contains seqs from group j
- numSeqsInGroupJ = it->second;
- }
-
- //add branch length to total for group
- DScore[groups[j]] += (numSeqsInGroupJ * sum);
+ index = t->tree[index].getParent();
+ }
+
+ //get last breanch length added
+ if(t->tree[index].getBranchLength() != -1){
+ sum += abs(t->tree[index].getBranchLength());
+ }
+
+ //for each group in the groups update the total branch length accounting for the names file
+ vector<string> groups = t->tree[treeNodes[v]].getGroup();
+ for (int j = 0; j < groups.size(); j++) {
+ int numSeqsInGroupJ = 0;
+ map<string, int>::iterator it;
+ it = t->tree[treeNodes[v]].pcount.find(groups[j]);
+ if (it != t->tree[treeNodes[v]].pcount.end()) { //this leaf node contains seqs from group j
+ numSeqsInGroupJ = it->second;
}
+
+ //add branch length to total for group
+ DScore[groups[j]] += (numSeqsInGroupJ * sum);
}
+
}
for (int i=0; i<globaldata->Groups.size(); i++) {
- //if (groupTotals[globaldata->Groups[i]] != 0.0) { //avoid divide by zero error
- //float percent = DScore[globaldata->Groups[i]] / groupTotals[globaldata->Groups[i]];
float percent = DScore[globaldata->Groups[i]];
data.push_back(percent);
- //}else { data.push_back(0.0); }
+
}
return data;
}
}
/**************************************************************************************************/
-void PhyloDiversity::setTotalGroupBranchLengths(Tree* t) {
- try {
-
- groupTotals.clear();
-
- //initialize group totals
- for (int i=0; i<globaldata->Groups.size(); i++) { groupTotals[globaldata->Groups[i]] = 0.0; }
-
-
- /********************************************************/
- //calculate a D value for each group
- for(int v=0;v<t->getNumLeaves();v++){
-
- //is this node from a sequence which is in one of the users groups
- if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
-
- //calc the branch length
- int index = v;
- float sum = 0.0;
-
- while(t->tree[index].getParent() != -1){ //while you aren't at root
-
- //if you have a BL
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
- index = t->tree[index].getParent();
- }
-
- //get last breanch length added
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
-
- //account for the names file
- vector<string> groups = t->tree[v].getGroup();
- for (int j = 0; j < groups.size(); j++) {
- int numSeqsInGroupJ = 0;
- map<string, int>::iterator it;
- it = t->tree[v].pcount.find(groups[j]);
- if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group j
- numSeqsInGroupJ = it->second;
- }
- //add branch length to total for group
- groupTotals[groups[j]] += (numSeqsInGroupJ * sum);
- }//end for
- }//end if
- }//end for
-
- }
- catch(exception& e) {
- m->errorOut(e, "PhyloDiversity", "setTotalGroupBranchLengths");
- exit(1);
- }
-}
-/**************************************************************************************************/
#include "globaldata.hpp"
#include "mothurout.h"
-typedef vector<double> EstOutput;
/***********************************************************************/
PhyloDiversity(TreeMap* t) : tmap(t) { globaldata = GlobalData::getInstance(); m = MothurOut::getInstance(); }
~PhyloDiversity() {};
- EstOutput getValues(Tree*, vector<int>);
- void setTotalGroupBranchLengths(Tree*);
+ //int getValues(Tree*, vector<int>, vector< vector< float> >&);
+
private:
GlobalData* globaldata;
MothurOut* m;
- EstOutput data;
TreeMap* tmap;
- map<string, float> groupTotals;
};
/***********************************************************************/
*/
#include "phylodiversitycommand.h"
-#include "phylodiversity.h"
//**********************************************************************************************************************
PhyloDiversityCommand::PhyloDiversityCommand(string option) {
splitAtDash(groups, Groups);
globaldata->Groups = Groups;
}
+
}
}
for (int i = 0; i < globaldata->Groups.size(); i++) { if (globaldata->Groups[i] == "xxx") { globaldata->Groups.erase(globaldata->Groups.begin()+i); break; } }
vector<string> outputNames;
-
- //diversity calculator
- PhyloDiversity phylo(globaldata->gTreemap);
-
+
vector<Tree*> trees = globaldata->gTree;
//for each of the users trees
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
- //phylo.setTotalGroupBranchLengths(trees[i]);
-
string outFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile())) + toString(i+1) + ".phylo.diversity";
if (rarefy) { outFile += ".rarefaction"; }
outputNames.push_back(outFile);
numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using
- //convert freq percentage to number
- int increment = 100;
- if (freq < 1.0) { increment = numLeafNodes * freq; }
- else { increment = freq; }
+ //each group, each sampling, if no rarefy iters = 1;
+ map<string, vector<float> > diversity;
//each group, each sampling, if no rarefy iters = 1;
- vector< vector<float> > diversity;
- diversity.resize(globaldata->Groups.size());
+ map<string, vector<float> > sumDiversity;
+
+ //find largest group total
+ int largestGroup = 0;
+ for (int j = 0; j < globaldata->Groups.size(); j++) {
+ if (globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]] > largestGroup) { largestGroup = globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]]; }
+
+ //initialize diversity
+ diversity[globaldata->Groups[j]].resize(globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]]+1, 0.0); //numSampled
+ //groupA 0.0 0.0
+
+ //initialize sumDiversity
+ sumDiversity[globaldata->Groups[j]].resize(globaldata->gTreemap->seqsPerGroup[globaldata->Groups[j]]+1, 0.0);
+ }
+
+ //convert freq percentage to number
+ int increment = 100;
+ if (freq < 1.0) { increment = largestGroup * freq;
+ }else { increment = freq; }
//initialize sampling spots
- vector<int> numSampledList;
- for(int k = 0; k < numLeafNodes; k++){ if((k == 0) || (k+1) % increment == 0){ numSampledList.push_back(k); } }
- if(numLeafNodes % increment != 0){ numSampledList.push_back(numLeafNodes); }
-
- //initialize diversity
- for (int j = 0; j < diversity.size(); j++) { diversity[j].resize(numSampledList.size(), 0.0); } // 10sampled 20 sampled ...
- //groupA 0.0 0.0
- //then for each iter you add to score and then when printing divide by iters to get average
+ set<int> numSampledList;
+ for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } }
+ if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
+
+ //add other groups ending points
+ for (int j = 0; j < globaldata->Groups.size(); j++) {
+ if (numSampledList.count(diversity[globaldata->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[globaldata->Groups[j]].size()-1); }
+ }
+
for (int l = 0; l < iters; l++) {
random_shuffle(randomLeaf.begin(), randomLeaf.end());
- vector<int> leavesSampled;
- EstOutput data;
- int count = 0;
+ //initialize counts
+ map<string, int> counts;
+ for (int j = 0; j < globaldata->Groups.size(); j++) { counts[globaldata->Groups[j]] = 0; }
+
for(int k = 0; k < numLeafNodes; k++){
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
- leavesSampled.push_back(randomLeaf[k]);
-
- if((k == 0) || (k+1) % increment == 0){ //ready to calc?
-
- data = phylo.getValues(trees[i], leavesSampled);
-
- //datas results are in the same order as globaldatas groups
- for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; }
-
- count++;
+ //calc branch length of randomLeaf k
+ float br = calcBranchLength(trees[i], randomLeaf[k]);
+
+ //for each group in the groups update the total branch length accounting for the names file
+ vector<string> groups = trees[i]->tree[randomLeaf[k]].getGroup();
+ for (int j = 0; j < groups.size(); j++) {
+ int numSeqsInGroupJ = 0;
+ map<string, int>::iterator it;
+ it = trees[i]->tree[randomLeaf[k]].pcount.find(groups[j]);
+ if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
+ numSeqsInGroupJ = it->second;
+ }
+
+ for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+ diversity[groups[j]][s] = diversity[groups[j]][s-1] + (numSeqsInGroupJ * br);
+ }
+ counts[groups[j]] += numSeqsInGroupJ;
}
}
-
- if(numLeafNodes % increment != 0){
-
- data = phylo.getValues(trees[i], leavesSampled);
-
- //datas results are in the same order as globaldatas groups
- for (int h = 0; h < data.size(); h++) { diversity[h][count] += data[h]; }
+
+ if (rarefy) {
+ //add this diversity to the sum
+ for (int j = 0; j < globaldata->Groups.size(); j++) {
+ for (int g = 0; g < diversity[globaldata->Groups[j]].size(); g++) {
+ sumDiversity[globaldata->Groups[j]][g] += diversity[globaldata->Groups[j]][g];
+ }
+ }
}
}
- printData(numSampledList, diversity, outFile);
+ if (rarefy) {
+ printData(numSampledList, sumDiversity, outFile);
+ }else{
+ printData(numSampledList, diversity, outFile);
+ }
}
}
//**********************************************************************************************************************
-void PhyloDiversityCommand::printData(vector<int>& num, vector< vector<float> >& div, string file){
+void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, string file){
try {
ofstream out;
openOutputFile(file, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
- for (int i = 0; i < num.size(); i++) {
- if (i == (num.size()-1)) { out << num[i] << '\t'; }
- else { out << (num[i]+1) << '\t'; }
+ for (set<int>::iterator it = num.begin(); it != num.end(); it++) {
+ int numSampled = *it;
- for (int j = 0; j < div.size(); j++) {
- float score = div[j][i] / (float)iters;
- out << setprecision(6) << score << '\t';
+ out << numSampled << '\t';
+
+ for (int j = 0; j < globaldata->Groups.size(); j++) {
+ if (numSampled < div[globaldata->Groups[j]].size()) {
+ float score = div[globaldata->Groups[j]][numSampled] / (float)iters;
+ out << setprecision(6) << score << '\t';
+ }else { out << "NA" << '\t'; }
}
out << endl;
}
}
//**********************************************************************************************************************
+float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){
+ try {
+
+ //calc the branch length
+ //while you aren't at root
+ float sum = 0.0;
+ int index = leaf;
+
+ while(t->tree[index].getParent() != -1){
+
+ //if you have a BL
+ if(t->tree[index].getBranchLength() != -1){
+ sum += abs(t->tree[index].getBranchLength());
+ }
+ index = t->tree[index].getParent();
+ }
+
+ //get last breanch length added
+ if(t->tree[index].getBranchLength() != -1){
+ sum += abs(t->tree[index].getBranchLength());
+ }
+
+ return sum;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
\ No newline at end of file
string groups, outputDir;
vector<string> Groups, outputNames; //holds groups to be used, and outputFile names
- void printData(vector<int>&, vector< vector<float> >&, string);
+ void printData(set<int>&, map< string, vector<float> >&, string);
+ float calcBranchLength(Tree*, int);
};
#endif
out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
if (groupmap != NULL) {
//so the labels match the counts below, since the map sorts them automatically...
- sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
+ //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
out << groupmap->namesOfGroups[i] << '\t';
map<string, int>::iterator itGroup;
if (groupmap != NULL) {
- for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
- out << itGroup->second << '\t';
- }
+ //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
+ // out << itGroup->second << '\t';
+ //}
+ for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[0].groupCount[groupmap->namesOfGroups[i]] << '\t'; }
}
out << endl;
map<string, int>::iterator itGroup;
if (groupmap != NULL) {
- for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
- out << itGroup->second << '\t';
- }
+ //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
+ // out << itGroup->second << '\t';
+ //}
+ for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[it->second].groupCount[groupmap->namesOfGroups[i]] << '\t'; }
}
out << endl;
}