From 5318771152ee5b1a280b3dc50aedd887dd1da8a8 Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 26 Feb 2009 21:04:36 +0000 Subject: [PATCH] parsimony with groups --- parsimony.cpp | 85 ++++++++++++++--- parsimony.h | 1 + parsimonycommand.cpp | 190 +++++++++++++++++++++++-------------- parsimonycommand.h | 21 ++-- tree.cpp | 6 +- tree.h | 2 +- unifracweightedcommand.cpp | 4 - unifracweightedcommand.h | 10 +- weighted.h | 2 +- 9 files changed, 213 insertions(+), 108 deletions(-) diff --git a/parsimony.cpp b/parsimony.cpp index 3840514..b2809ed 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -15,22 +15,86 @@ EstOutput Parsimony::getValues(Tree* t) { try { globaldata = GlobalData::getInstance(); - data.resize(1,0); + copyTree = new Tree(); + + //if the users enters no groups then give them the score of all groups + int numGroups = globaldata->Groups.size(); + if (numGroups == 0) { + numGroups++; + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + } + } + + //calculate number of comparsions + int numComp = 0; + for (int r=0; r groups; + + int count = 0; + for (int a=0; aGroups[a]); groups.push_back(globaldata->Groups[l]); + + //copy users tree so that you can redo pgroups + copyTree->getCopy(t); + + //create pgroups that reflect the groups the user want to use + for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ + copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups)); + } + + for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ + int lc = copyTree->tree[i].getLChild(); + int rc = copyTree->tree[i].getRChild(); + int iSize = copyTree->tree[i].pGroups.size(); + int rcSize = copyTree->tree[rc].pGroups.size(); + int lcSize = copyTree->tree[lc].pGroups.size(); + + //if isize are 0 then that branch is to be ignored + if (iSize == 0) { } + else if ((rcSize == 0) || (lcSize == 0)) { } + //if you have more groups than either of your kids then theres been a change. + else if(iSize > rcSize || iSize > lcSize){ + score++; + } + } + + data[count] = score; + count++; + groups.clear(); + } + } + + //get score for all users groups + + //copy users tree so that you can redo pgroups + copyTree->getCopy(t); int score = 0; //create pgroups that reflect the groups the user want to use - for(int i=t->getNumLeaves();igetNumNodes();i++){ - t->tree[i].pGroups = (t->mergeUserGroups(i)); + for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ + copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, globaldata->Groups)); } - for(int i=t->getNumLeaves();igetNumNodes();i++){ - int lc = t->tree[i].getLChild(); - int rc = t->tree[i].getRChild(); + for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ + int lc = copyTree->tree[i].getLChild(); + int rc = copyTree->tree[i].getRChild(); - int iSize = t->tree[i].pGroups.size(); - int rcSize = t->tree[rc].pGroups.size(); - int lcSize = t->tree[lc].pGroups.size(); + int iSize = copyTree->tree[i].pGroups.size(); + int rcSize = copyTree->tree[rc].pGroups.size(); + int lcSize = copyTree->tree[lc].pGroups.size(); //if isize are 0 then that branch is to be ignored if (iSize == 0) { } @@ -41,8 +105,7 @@ EstOutput Parsimony::getValues(Tree* t) { } } - - data[0] = score; + data[count] = score; return data; } diff --git a/parsimony.h b/parsimony.h index 11d4cc8..a22f5bd 100644 --- a/parsimony.h +++ b/parsimony.h @@ -27,6 +27,7 @@ class Parsimony : public TreeCalculator { private: GlobalData* globaldata; + Tree* copyTree; EstOutput data; TreeMap* tmap; map::iterator it; diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 1a493bd..e741b5e 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -51,31 +51,39 @@ ParsimonyCommand::ParsimonyCommand() { int ParsimonyCommand::execute() { try { - //get pscore for users tree - userData.resize(1,0); //data[0] = pscore. - randomData.resize(1,0); //data[0] = pscore. - + if (randomtree == "") { - copyUserTree = new Tree(); + //get pscore for users tree + userData.resize(numComp,0); //data = AB, AC, BC, ABC. + randomData.resize(numComp,0); //data = AB, AC, BC, ABC. + rscoreFreq.resize(numComp); + uscoreFreq.resize(numComp); + rCumul.resize(numComp); + uCumul.resize(numComp); + validScores.resize(numComp); + userTreeScores.resize(numComp); + UScoreSig.resize(numComp); + //get pscores for users trees for (int i = 0; i < T.size(); i++) { - //copy users tree so that you can redo pgroups - copyUserTree->getCopy(T[i]); cout << "Processing tree " << i+1 << endl; - userData = pars->getValues(copyUserTree); //userData[0] = pscore - cout << "Tree " << i+1 << " parsimony score = " << userData[0] << endl; - //update uscoreFreq - it = uscoreFreq.find(userData[0]); - if (it == uscoreFreq.end()) {//new score - uscoreFreq[userData[0]] = 1; - }else{ uscoreFreq[userData[0]]++; } - - //add users score to valid scores - validScores[userData[0]] = userData[0]; + userData = pars->getValues(T[i]); //data = AB, AC, BC, ABC. - //save score for summary file - userTreeScores.push_back(userData[0]); - + //output scores for each combination + for(int k = 0; k < numComp; k++) { + cout << "Tree " << i+1 << " Combination " << groupComb[k] << " parsimony score = " << userData[k] << endl; + //update uscoreFreq + it = uscoreFreq[k].find(userData[k]); + if (it == uscoreFreq[k].end()) {//new score + uscoreFreq[k][userData[k]] = 1; + }else{ uscoreFreq[k][userData[k]]++; } + + //add users score to valid scores + validScores[k][userData[k]] = userData[k]; + + //save score for summary file + userTreeScores[k].push_back(userData[k]); + } } //get pscores for random trees @@ -86,17 +94,19 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); //get pscore of random tree randomData = pars->getValues(randT); + + for(int r = 0; r < numComp; r++) { + //add trees pscore to map of scores + it2 = rscoreFreq[r].find(randomData[r]); + if (it2 != rscoreFreq[r].end()) {//already have that score + rscoreFreq[r][randomData[r]]++; + }else{//first time we have seen this score + rscoreFreq[r][randomData[r]] = 1; + } - //add trees pscore to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; + //add randoms score to validscores + validScores[r][randomData[r]] = randomData[r]; } - - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; delete randT; } @@ -109,18 +119,20 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); //get pscore of random tree randomData = pars->getValues(randT); + + for(int r = 0; r < numComp; r++) { + //add trees pscore to map of scores + it2 = rscoreFreq[r].find(randomData[r]); + if (it2 != rscoreFreq[r].end()) {//already have that score + rscoreFreq[r][randomData[r]]++; + }else{//first time we have seen this score + rscoreFreq[r][randomData[r]] = 1; + } - //add trees pscore to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; + //add randoms score to validscores + validScores[r][randomData[r]] = randomData[r]; } - - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; - + delete randT; } } @@ -128,30 +140,32 @@ int ParsimonyCommand::execute() { float rcumul = 0.0000; float ucumul = 0.0000; + for(int a = 0; a < numComp; a++) { //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (it = validScores.begin(); it != validScores.end(); it++) { - if (randomtree == "") { - it2 = uscoreFreq.find(it->first); - //user data has that score - if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second; } - else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score - //make uCumul map - uCumul[it->first] = ucumul; + for (it = validScores[a].begin(); it != validScores[a].end(); it++) { + if (randomtree == "") { + it2 = uscoreFreq[a].find(it->first); + //user data has that score + if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; } + else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score + //make uCumul map + uCumul[a][it->first] = ucumul-a; + } + + //make rscoreFreq map and rCumul + it2 = rscoreFreq[a].find(it->first); + //get percentage of random trees with that info + if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; } + else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score + rCumul[a][it->first] = rcumul-a; } - //make rscoreFreq map and rCumul - it2 = rscoreFreq.find(it->first); - //get percentage of random trees with that info - if (it2 != rscoreFreq.end()) { rscoreFreq[it->first] /= iters; rcumul+= it2->second; } - else { rscoreFreq[it->first] = 0.0000; } //no random trees with that score - rCumul[it->first] = rcumul; + //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file + for (int h = 0; h < userTreeScores[a].size(); h++) { + UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]); + } } - //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file - for (int h = 0; h < userTreeScores.size(); h++) { - UScoreSig.push_back(rCumul[userTreeScores[h]]); - } - printParsimonyFile(); printUSummaryFile(); @@ -181,23 +195,24 @@ void ParsimonyCommand::printParsimonyFile() { try { //column headers if (randomtree == "") { - out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + out << "Comb" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; }else { - out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + out << "Comb" << '\t' << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; } //format output out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - //print each line - for (it = validScores.begin(); it != validScores.end(); it++) { - if (randomtree == "") { - out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - }else{ - out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - } - } - + for(int a = 0; a < numComp; a++) { + //print each line + for (it = validScores[a].begin(); it != validScores[a].end(); it++) { + if (randomtree == "") { + out << setprecision(6) << groupComb[a] << '\t' << it->first << '\t' << '\t'<< uscoreFreq[a][it->first] << '\t' << uCumul[a][it->first] << '\t' << rscoreFreq[a][it->first] << '\t' << rCumul[a][it->first] << endl; + }else{ + out << setprecision(6) << groupComb[a] << '\t' << it->first << '\t' << '\t' << rscoreFreq[a][it->first] << '\t' << rCumul[a][it->first] << endl; + } + } + } out.close(); } @@ -214,14 +229,17 @@ void ParsimonyCommand::printParsimonyFile() { void ParsimonyCommand::printUSummaryFile() { try { //column headers - outSum << "Tree#" << '\t' << "ParsScore" << '\t' << '\t' << "ParsSig" << endl; + outSum << "Tree#" << '\t' << "Comb" << '\t' << "ParsScore" << '\t' << '\t' << "ParsSig" << endl; //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); + //print each line for (int i = 0; i< T.size(); i++) { - outSum << setprecision(6) << i+1 << '\t' << '\t' << userTreeScores[i] << '\t' << UScoreSig[i] << endl; + for(int a = 0; a < numComp; a++) { + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << userTreeScores[a][i] << '\t' << UScoreSig[a][i] << endl; + } } outSum.close(); @@ -286,6 +304,8 @@ void ParsimonyCommand::getUserInput() { void ParsimonyCommand::setGroups() { try { + string allGroups = ""; + numGroups = 0; //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() != 0) { if (globaldata->Groups[0] != "all") { @@ -294,7 +314,7 @@ void ParsimonyCommand::setGroups() { if (tmap->isValidGroup(globaldata->Groups[i]) != true) { cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; // erase the invalid group from globaldata->Groups - globaldata->Groups.erase (globaldata->Groups.begin()+i); + globaldata->Groups.erase(globaldata->Groups.begin()+i); } } @@ -303,19 +323,43 @@ void ParsimonyCommand::setGroups() { cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; for (int i = 0; i < tmap->namesOfGroups.size(); i++) { globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i]; + } + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + allGroups += tmap->namesOfGroups[i]; + numGroups++; } } }else{//user has enter "all" and wants the default groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i]; } globaldata->setGroups(""); } }else { for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + allGroups += tmap->namesOfGroups[i]; + } + numGroups = 1; + } + + //calculate number of comparsions + numComp = 0; + for (int r=0; rGroups[r]+globaldata->Groups[l]); + numComp++; } } + + //ABC + groupComb.push_back(allGroups); + numComp++; + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; diff --git a/parsimonycommand.h b/parsimonycommand.h index ba30d40..db062c6 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -32,20 +32,21 @@ class ParsimonyCommand : public Command { TreeMap* tmap; TreeMap* savetmap; Parsimony* pars; + vector groupComb; // AB. AC, BC... string parsFile, sumFile, randomtree; - int iters, numGroups; + int iters, numGroups, numComp; vector numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib. - vector userTreeScores; //scores for users trees - vector UScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or lower. + vector< vector > userTreeScores; //scores for users trees for each comb. + vector< vector > UScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or lower. EstOutput userData; //pscore info for user tree EstOutput randomData; //pscore info for random trees - map validScores; //contains scores from both user and random - map rscoreFreq; //pscore, number of random trees with that score. - map uscoreFreq; //pscore, number of user trees with that score. - map rCumul; //pscore, cumulative percentage of number of random trees with that score or lower. - map uCumul; //pscore, cumulative percentage of number of user trees with that score or lower . - map::iterator it; - map::iterator it2; + vector< map > validScores; //map contains scores from both user and random + vector< map > rscoreFreq; //map -vector entry for each combination. + vector< map > uscoreFreq; //map -vector entry for each combination. + vector< map > rCumul; //map -vector entry for each combination. + vector< map > uCumul; //map -vector entry for each combination. + map::iterator it; + map::iterator it2; ofstream out, outSum; diff --git a/tree.cpp b/tree.cpp index 3b3b684..5029438 100644 --- a/tree.cpp +++ b/tree.cpp @@ -211,7 +211,7 @@ map Tree::mergeGroups(int i) { // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1. -map Tree::mergeUserGroups(int i) { +map Tree::mergeUserGroups(int i, vector g) { try { int lc = tree[i].getLChild(); @@ -219,12 +219,12 @@ map Tree::mergeUserGroups(int i) { //loop through nodes groups removing the ones the user doesn't want for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) { - if (inUsersGroups(it->first, globaldata->Groups) != true) { tree[lc].pGroups.erase(it->first); } + if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); } } //loop through nodes groups removing the ones the user doesn't want for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) { - if (inUsersGroups(it->first, globaldata->Groups) != true) { tree[rc].pGroups.erase(it->first); } + if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); } } //set parsimony groups to left child diff --git a/tree.h b/tree.h index b0ae873..733f3a0 100644 --- a/tree.h +++ b/tree.h @@ -32,7 +32,7 @@ public: void setIndex(string, int); int getNumNodes() { return numNodes; } int getNumLeaves(){ return numLeaves; } - map mergeUserGroups(int); //returns a map with a groupname and the number of times that group was seen in the children + map mergeUserGroups(int, vector); //returns a map with a groupname and the number of times that group was seen in the children void printTree(); //this function takes the leaf info and populates the non leaf nodes diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index a99459d..f7ad525 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -72,7 +72,6 @@ int UnifracWeightedCommand::execute() { //get scores for random trees for (int j = 0; j < iters; j++) { -// int n = 1; int count = 0; for (int r=0; rgetValues(randT, tmap->namesOfGroups[r], tmap->namesOfGroups[l]); } -// randT->createNewickFile("hold"+toString(r)+toString(l)+toString(j)); - //save scores rScores[count].push_back(randomData[0]); validScores[count][randomData[0]] = randomData[0]; count++; } -// n++; } } diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index e8daaef..4d8f662 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -28,8 +28,8 @@ class UnifracWeightedCommand : public Command { private: GlobalData* globaldata; vector T; //user trees - vector utreeScores; //user tree unweighted scores - vector WScoreSig; //tree weighted score signifigance when compared to random trees - percentage of random trees with that score or lower. + vector utreeScores; //user tree unweighted scores + vector WScoreSig; //tree weighted score signifigance when compared to random trees - percentage of random trees with that score or lower. vector groupComb; // AB. AC, BC... Tree* randT; //random tree TreeMap* tmap; @@ -38,9 +38,9 @@ class UnifracWeightedCommand : public Command { int iters, numGroups, numComp; EstOutput userData; //weighted score info for user tree EstOutput randomData; //weighted score info for random trees - vector< vector > validScores; //vector each group comb has an entry - vector< vector > rScores; //vector each group comb has an entry - vector< vector > uScores; //vector each group comb has an entry + vector< vector > validScores; //vector each group comb has an entry + vector< vector > rScores; //vector each group comb has an entry + vector< vector > uScores; //vector each group comb has an entry ofstream outSum, out; diff --git a/weighted.h b/weighted.h index 41b738b..3fed6f5 100644 --- a/weighted.h +++ b/weighted.h @@ -29,7 +29,7 @@ class Weighted : public TreeCalculator { EstOutput data; TreeMap* tmap; map::iterator it; - map WScore; //a score for each group combination i.e. AB, AC, BC. + map WScore; //a score for each group combination i.e. AB, AC, BC. }; /***********************************************************************/ -- 2.39.2