int main(int argc, char *argv[]){
try {
- // srand(54321);
+ //srand(54321);
srand( (unsigned)time( NULL ) );
Engine* dotur;
Parsimony(TreeMap* t) : tmap(t) {};
~Parsimony() {};
EstOutput getValues(Tree*);
+ EstOutput getValues(Tree*, string, string) { return data; };
private:
GlobalData* globaldata;
//set up the groups the user wants to include
setGroups();
- for(int i=numLeaves-1;i>=0;i--){
- if(tree[i].pGroups.size() == 0){
- continue;
- }
-
- int escape = 1;
+ for(int i = 0; i < numLeaves; i++){
int z;
-
- while(escape == 1){
- //get random index to switch with
- z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
-
- if(tree[z].pGroups.size() != 0){
- escape = 0;
- }
- }
+ //get random index to switch with
+ z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
//you only want to randomize the nodes that are from a group the user wants analyzed, so
//if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
bool treez, treei;
-
- //leaves have only one group so you can just set it to begin()
- it = tree[z].pGroups.begin();
- treez = inUsersGroups(it->first, globaldata->Groups);
-
- it = tree[i].pGroups.begin();
- treei = inUsersGroups(it->first, globaldata->Groups);
+
+ treez = inUsersGroups(tree[z].getGroup(), globaldata->Groups);
+ treei = inUsersGroups(tree[i].getGroup(), globaldata->Groups);
if ((treez == true) && (treei == true)) {
//switches node i and node z's info.
map<string,int> lib_hold = tree[z].pGroups;
tree[z].pGroups = (tree[i].pGroups);
tree[i].pGroups = (lib_hold);
-
- tree[z].setGroup(tree[z].pGroups.begin()->first);
- tree[i].setGroup(tree[i].pGroups.begin()->first);
-
+
+ string zgroup = tree[z].getGroup();
+ tree[z].setGroup(tree[i].getGroup());
+ tree[i].setGroup(zgroup);
+
+ string zname = tree[z].getName();
+ tree[z].setName(tree[i].getName());
+ tree[i].setName(zname);
+
map<string,int> gcount_hold = tree[z].pcount;
tree[z].pcount = (tree[i].pcount);
tree[i].pcount = (gcount_hold);
}
/**************************************************************************************************/
+void Tree::randomLabels(string groupA, string groupB) {
+ try {
+ for(int i = 0; i < numLeaves; i++) {
+ int z;
+ //get random index to switch with
+ z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
+
+ //you only want to randomize the nodes that are from a group the user wants analyzed, so
+ //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
+ if (((tree[z].getGroup() == groupA) || (tree[z].getGroup() == groupB)) && ((tree[i].getGroup() == groupA) || (tree[i].getGroup() == groupB))) {
+ //switches node i and node z's info.
+ map<string,int> lib_hold = tree[z].pGroups;
+ tree[z].pGroups = (tree[i].pGroups);
+ tree[i].pGroups = (lib_hold);
+
+ string zgroup = tree[z].getGroup();
+ tree[z].setGroup(tree[i].getGroup());
+ tree[i].setGroup(zgroup);
+
+ string zname = tree[z].getName();
+ tree[z].setName(tree[i].getName());
+ tree[i].setName(zname);
+
+ map<string,int> gcount_hold = tree[z].pcount;
+ tree[z].pcount = (tree[i].pcount);
+ tree[i].pcount = (gcount_hold);
+ }
+ }
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/**************************************************************************************************/
void Tree::randomBlengths() {
try {
for(int i=numNodes-1;i>=0;i--){
randomLabels();
assembleTree();
}
+/*************************************************************************************************/
+void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
+ randomLabels(groupA, groupB);
+ assembleTree();
+}
/*************************************************************************************************/
//for now it's just random topology but may become random labels as well later that why this is such a simple function now...
/*****************************************************************/
// This prints out the tree in Newick form.
-void Tree::createNewickFile() {
+void Tree::createNewickFile(string f) {
try {
int root = findRoot();
- filename = getRootName(globaldata->getTreeFile()) + "newick";
+ //filename = getRootName(globaldata->getTreeFile()) + "newick";
+ filename = f;
openOutputFile(filename, out);
printBranch(root);
// you are at the end of the tree
out << ";" << endl;
+ out.close();
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
printBranch(tree[node].getRChild());
out << ")";
}else { //you are a leaf
- tree[node].printNode(); //prints out name and branch length
+ out << tree[node].getName() << ":" << tree[node].getBranchLength();
}
}
void getCopy(Tree*); //makes tree a copy of the one passed in.
void assembleRandomTree();
void assembleRandomUnifracTree();
- void createNewickFile();
+ void assembleRandomUnifracTree(string, string);
+ void createNewickFile(string);
int getIndex(string);
void setIndex(string, int);
int getNumNodes() { return numNodes; }
void randomTopology();
void randomBlengths();
void randomLabels();
+ void randomLabels(string, string);
int findRoot(); //return index of root node
void printBranch(int); //recursively print out tree
void setGroups();
TreeCalculator(string n) : name(n) {};
~TreeCalculator(){};
virtual EstOutput getValues(Tree*) = 0;
+ virtual EstOutput getValues(Tree*, string, string) = 0;
virtual string getName() { return name; }
for(it=pcount.begin();it!=pcount.end();it++){
cout << ' ' << it->first << ':' << it->second;
}
- cout << endl;
+ cout << endl;
+
}
catch(exception& e) {
//get percentage of random trees with that info
rscoreFreq[it->first] /= iters;
rcumul-= it->second;
-
}
//save the signifigance of the users score for printing later
tmap = globaldata->gTreemap;
weightedFile = globaldata->getTreeFile() + ".weighted";
openOutputFile(weightedFile, out);
+ //column headers
+ out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
+
sumFile = globaldata->getTreeFile() + ".wsummary";
openOutputFile(sumFile, outSum);
//get weighted for users tree
userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
- uscoreFreq.resize(numComp);
- validScores.resize(numComp);
- totalrscoreFreq.resize(numComp);
- uCumul.resize(numComp);
-
+
//create new tree with same num nodes and leaves as users
randT = new Tree();
//get pscores for users trees
for (int i = 0; i < T.size(); i++) {
- rscoreFreq.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
- rCumul.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
-
+ rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
+ uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
+ validScores.resize(numComp);
+
cout << "Processing tree " << i+1 << endl;
userData = weighted->getValues(T[i]); //userData[0] = weightedscore
//save users score
for (int s=0; s<numComp; s++) {
- //update uscoreFreq
- it = uscoreFreq[s].find(userData[s]);
- if (it == uscoreFreq[s].end()) {//new score
- uscoreFreq[s][userData[s]] = 1;
- }else{ uscoreFreq[s][userData[s]]++; }
+ //add users score to vector of user scores
+ uScores[s].push_back(userData[s]);
- //add user score to valid scores
- validScores[s][userData[s]] = userData[s];
+ //add users score to vector of valid scores
+ validScores[s].push_back(userData[s]);
//save users tree score for summary file
utreeScores.push_back(userData[s]);
}
- //copy T[i]'s info.
- randT->getCopy(T[i]);
-
//get scores for random trees
for (int j = 0; j < iters; j++) {
- //create a random tree with same topology as T[i], but different labels
- randT->assembleRandomUnifracTree();
- //get pscore of random tree
- randomData = weighted->getValues(randT);
-
- //save ramdoms score
- for (int p=0; p<numComp; p++) {
- //add trees weighted score random score freq
- it2 = rscoreFreq[p].find(randomData[p]);
- if (it2 != rscoreFreq[p].end()) {//already have that score
- rscoreFreq[p][randomData[p]]++;
- }else{//first time we have seen this score
- rscoreFreq[p][randomData[p]] = 1;
+ int n = 1;
+ int count = 0;
+ for (int r=0; r<numGroups; r++) {
+ for (int l = n; l < numGroups; l++) {
+ //copy T[i]'s info.
+ randT->getCopy(T[i]);
+
+ if (globaldata->Groups.size() != 0) {
+ //create a random tree with same topology as T[i], but different labels
+ randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
+ //get wscore of random tree
+ randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
+ }else {
+ //create a random tree with same topology as T[i], but different labels
+ randT->assembleRandomUnifracTree(tmap->namesOfGroups[r], tmap->namesOfGroups[l]);
+ //get wscore of random tree
+ randomData = weighted->getValues(randT, tmap->namesOfGroups[r], tmap->namesOfGroups[l]);
+ }
+
+ //save scores
+ rScores[count].push_back(randomData[0]);
+ validScores[count][randomData[0]] = randomData[0];
+ count++;
}
-
- //add random score to valid scores
- validScores[p][randomData[p]] = randomData[p];
+ n++;
}
}
-
- saveRandomScores(); //save all random scores for weighted file
+
+ removeValidScoresDuplicates();
//find the signifigance of the score for summary file
- for (int t = 0; t < numComp; t++) {
- float rcumul = 1.0000;
- for (it = validScores[t].begin(); it != validScores[t].end(); it++) {
- //make rscoreFreq map and rCumul
- it2 = rscoreFreq[t].find(it->first);
- rCumul[t][it->first] = rcumul;
- //get percentage of random trees with that info
- if (it2 != rscoreFreq[t].end()) { rscoreFreq[t][it->first] /= iters; rcumul-= it2->second; }
- else { rscoreFreq[t][it->first] = 0.0000; } //no random trees with that score
- }
- }
-
- //save the signifigance of the users score for printing later
for (int f = 0; f < numComp; f++) {
- WScoreSig.push_back(rCumul[f][userData[f]]);
- }
+ //sort random scores
+ sort(rScores[f].begin(), rScores[f].end());
+
+ //the index of the score higher than yours is returned
+ //so if you have 1000 random trees the index returned is 100
+ //then there are 900 trees with a score greater then you.
+ //giving you a signifigance of 0.900
+ int index = findIndex(userData[f]); if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
+ //the signifigance is the number of trees with the users score or higher
+ WScoreSig.push_back((iters-index)/(float)iters);
+ }
- //clear random data
- rscoreFreq.clear();
- rCumul.clear();
- }
-
- rCumul.resize(numComp);
- for (int b = 0; b < numComp; b++) {
- float ucumul = 1.0000;
- float rcumul = 1.0000;
- //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
- for (it = validScores[b].begin(); it != validScores[b].end(); it++) {
- it2 = uscoreFreq[b].find(it->first);
- //make uCumul map
- uCumul[b][it->first] = ucumul;
- //user data has that score
- if (it2 != uscoreFreq[b].end()) { uscoreFreq[b][it->first] /= T.size(); ucumul-= it2->second; }
- else { uscoreFreq[b][it->first] = 0.0000; } //no user trees with that score
+ out << "Tree# " << i << endl;
+ //printWeightedFile();
- //make rscoreFreq map and rCumul
- it2 = totalrscoreFreq[b].find(it->first);
- rCumul[b][it->first] = rcumul;
- //get percentage of random trees with that info
- if (it2 != totalrscoreFreq[b].end()) { totalrscoreFreq[b][it->first] /= (iters * T.size()); rcumul-= it2->second; }
- else { totalrscoreFreq[b][it->first] = 0.0000; } //no random trees with that score
- }
+ //clear data
+ rScores.clear();
+ uScores.clear();
+ validScores.clear();
}
- printWeightedFile();
printWSummaryFile();
//clear out users groups
exit(1);
}
}
-/***********************************************************/
+/***********************************************************
void UnifracWeightedCommand::printWeightedFile() {
try {
- //column headers
-
- out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
-
+
//format output
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
//for each group
for (int e = 0; e < numComp; e++) {
//print each line in that group
- for (it = validScores[e].begin(); it != validScores[e].end(); it++) {
- out << setprecision(6) << groupComb[e] << '\t' << it->first << '\t' << '\t' << uscoreFreq[e][it->first] << '\t' << uCumul[e][it->first] << '\t' << totalrscoreFreq[e][it->first] << '\t' << rCumul[e][it->first] << endl;
+ for (i = 0; i < validScores[e].size(); i++) {
+ out << setprecision(6) << groupComb[e] << '\t' << validScores[e][i] << '\t' << '\t' << uscoreFreq[e][it->first] << '\t' << uCumul[e][it->first] << '\t' << rscoreFreq[e][it->first] << '\t' << rCumul[e][it->first] << endl;
}
}
}
/***********************************************************/
-void UnifracWeightedCommand::saveRandomScores() {
+void UnifracWeightedCommand::removeValidScoresDuplicates() {
try {
for (int e = 0; e < numComp; e++) {
- //update total map with new random scores
- for (it = rscoreFreq[e].begin(); it != rscoreFreq[e].end(); it++) {
- //does this score already exist in the total map
- it2 = totalrscoreFreq[e].find(it->first);
- //if yes then add them
- if (it2 != totalrscoreFreq[e].end()) {
- totalrscoreFreq[e][it->first] += rscoreFreq[e][it->first];
- }else{ //its a new score
- totalrscoreFreq[e][it->first] = rscoreFreq[e][it->first];
- }
+ //sort valid scores
+ sort(validScores[e].begin(), validScores[e].end());
+
+ for (int i = 0; i< validScores[e].size()-1; i++) {
+ if (validScores[e][i] == validScores[e][i+1]) { validScores[e].erase(validScores[e].begin()+i); }
+ }
+ }
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the UnifracWeightedCommand class function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+/***********************************************************/
+int UnifracWeightedCommand::findIndex(float score) {
+ try{
+ for (int e = 0; e < numComp; e++) {
+ for (int i = 0; i < rScores[e].size(); i++) {
+//cout << rScores[e][i] << " number " << i << endl;
+ if (rScores[e][i] >= score) { return i; }
}
}
+ return -1;
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
catch(...) {
- cout << "An unknown error has occurred in the UnifracWeightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
}
int iters, numGroups, numComp;
EstOutput userData; //weighted score info for user tree
EstOutput randomData; //weighted score info for random trees
- vector< map<float, float> > validScores; //vector<contains scores from both user and random> each group comb has an entry
- vector< map<float, float> > rscoreFreq; //vector<weighted score, number of random trees with that score.> each group comb has an entry
- vector< map<float, float> > uscoreFreq; //vector<weighted, number of user trees with that score.> each group comb has an entry
- vector< map<float, float> > totalrscoreFreq; //vector<weighted score, number of random trees with that score.> each group comb has an entry
- vector< map<float, float> > rCumul; //vector<weighted score, number of random trees with that score or higher.> each group comb has an entry
- vector< map<float, float> > uCumul; //vector<weighted, cumulative percentage of number of user trees with that score or higher.> each group comb has an entry
- map<float, float>::iterator it;
- map<float, float>::iterator it2;
-
+ vector< vector<float> > validScores; //vector<contains scores from both user and random> each group comb has an entry
+ vector< vector<float> > rScores; //vector<weighted scores for random trees.> each group comb has an entry
+ vector< vector<float> > uScores; //vector<weighted scores for user trees.> each group comb has an entry
+
ofstream outSum, out;
void printWSummaryFile();
- void printWeightedFile();
- void saveRandomScores();
+ // void printWeightedFile();
+ void removeValidScoresDuplicates();
+ int findIndex(float);
void setGroups();
};
Unweighted(TreeMap* t) : tmap(t) {};
~Unweighted() {};
EstOutput getValues(Tree*);
+ EstOutput getValues(Tree*, string, string) { return data; };
private:
GlobalData* globaldata;
}
-/**************************************************************************************************/
\ No newline at end of file
+/**************************************************************************************************/
+EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
+ try {
+ globaldata = GlobalData::getInstance();
+
+ data.clear(); //clear out old values
+
+ //initialize weighted score
+ WScore[(groupA+groupB)] = 0.0;
+ float D = 0.0;
+
+
+ /********************************************************/
+ //calculate a D value for the group combo
+ for(int v=0;v<t->getNumLeaves();v++){
+ int index = v;
+ double sum = 0.0000;
+
+ //while you aren't at root
+ while(t->tree[index].getParent() != -1){
+
+ //if you have a BL
+ if(t->tree[index].getBranchLength() != -1){
+ sum += t->tree[index].getBranchLength();
+ }
+ index = t->tree[index].getParent();
+ }
+
+ //get last breanch length added
+ if(t->tree[index].getBranchLength() != -1){
+ sum += t->tree[index].getBranchLength();
+ }
+
+ if ((t->tree[v].getGroup() == groupA) || (t->tree[v].getGroup() == groupB)) {
+ sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+ D += sum;
+ }
+ }
+ /********************************************************/
+
+ //calculate u for the group comb
+ for(int i=0;i<t->getNumNodes();i++){
+ double u;
+ //does this node have descendants from groupA
+ it = t->tree[i].pcount.find(groupA);
+ //if it does u = # of its descendants with a certain group / total number in tree with a certain group
+ if (it != t->tree[i].pcount.end()) {
+ u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
+ }else { u = 0.00; }
+
+
+ //does this node have descendants from group l
+ it = t->tree[i].pcount.find(groupB);
+ //if it does subtract their percentage from u
+ if (it != t->tree[i].pcount.end()) {
+ u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
+ }
+
+ u = abs(u) * t->tree[i].getBranchLength();
+
+ //save groupcombs u value
+ WScore[(groupA+groupB)] += u;
+ }
+
+ /********************************************************/
+
+ //calculate weighted score for the group combination
+ double UN;
+ UN = (WScore[(groupA+groupB)] / D);
+
+ if (isnan(UN) || isinf(UN)) { UN = 0; }
+ data.push_back(UN);
+
+ return data;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the Weighted class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the Weighted class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
+
+
+
+
+
+
+
+
+
Weighted(TreeMap* t) : tmap(t) {};
~Weighted() {};
EstOutput getValues(Tree*);
+ EstOutput getValues(Tree*, string, string);
private:
GlobalData* globaldata;