From: westcott Date: Fri, 27 Feb 2009 15:38:16 +0000 (+0000) Subject: fixed parsimony with groups and worked on unifrac.unweighted with groups X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=fc7cf3aac8fd6106fd725b43baa8ab5ca6f836f8 fixed parsimony with groups and worked on unifrac.unweighted with groups --- diff --git a/parsimony.cpp b/parsimony.cpp index b2809ed..0dea64e 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -14,17 +14,12 @@ EstOutput Parsimony::getValues(Tree* t) { try { globaldata = GlobalData::getInstance(); + vector groups; copyTree = new Tree(); //if the users enters no groups then give them the score of all groups int numGroups = globaldata->Groups.size(); - if (numGroups == 0) { - numGroups++; - for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); - } - } //calculate number of comparsions int numComp = 0; @@ -36,7 +31,6 @@ EstOutput Parsimony::getValues(Tree* t) { //numComp+1 for AB, AC, BC, ABC data.resize(numComp+1,0); - vector groups; int count = 0; for (int a=0; agetCopy(t); - int score = 0; + if (numComp != 1) { + if (numGroups == 0) { + //get score for all users groups + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + groups.push_back(tmap->namesOfGroups[i]); + } + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + groups.push_back(globaldata->Groups[i]); + } + } + + //copy users tree so that you can redo pgroups + copyTree->getCopy(t); + int score = 0; - //create pgroups that reflect the groups the user want to use - for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ - copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, globaldata->Groups)); - } + //create pgroups that reflect the groups the user want to use + for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ + copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups)); + } - for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ - int lc = copyTree->tree[i].getLChild(); - int rc = copyTree->tree[i].getRChild(); + for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ + int lc = copyTree->tree[i].getLChild(); + int rc = copyTree->tree[i].getRChild(); - int iSize = copyTree->tree[i].pGroups.size(); - int rcSize = copyTree->tree[rc].pGroups.size(); - int lcSize = copyTree->tree[lc].pGroups.size(); + int iSize = copyTree->tree[i].pGroups.size(); + int rcSize = copyTree->tree[rc].pGroups.size(); + int lcSize = copyTree->tree[lc].pGroups.size(); - //if isize are 0 then that branch is to be ignored - if (iSize == 0) { } - else if ((rcSize == 0) || (lcSize == 0)) { } - //if you have more groups than either of your kids then theres been a change. - else if(iSize > rcSize || iSize > lcSize){ - score++; - } - } + //if isize are 0 then that branch is to be ignored + if (iSize == 0) { } + else if ((rcSize == 0) || (lcSize == 0)) { } + //if you have more groups than either of your kids then theres been a change. + else if(iSize > rcSize || iSize > lcSize){ + score++; + } + } - data[count] = score; + data[count] = score; + } return data; } diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index c40dde0..a527a18 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -70,7 +70,6 @@ int ParsimonyCommand::execute() { //output scores for each combination for(int k = 0; k < numComp; k++) { - cout << "Tree " << i+1 << " Combination " << groupComb[k] << " parsimony score = " << userData[k] << endl; //update uscoreFreq it = uscoreFreq[k].find(userData[k]); if (it == uscoreFreq[k].end()) {//new score @@ -166,10 +165,10 @@ int ParsimonyCommand::execute() { } printParsimonyFile(); - if (randomtree != "") { printUSummaryFile(); } + if (randomtree == "") { printUSummaryFile(); } //reset globaldata's treemap if you just did random distrib - if (randomtree == "") { globaldata->gTreemap = savetmap; } + if (randomtree != "") { globaldata->gTreemap = savetmap; } //reset randomTree parameter to "" globaldata->setRandomTree(""); @@ -238,6 +237,7 @@ void ParsimonyCommand::printUSummaryFile() { for (int i = 0; i< T.size(); i++) { for(int a = 0; a < numComp; a++) { outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << userTreeScores[a][i] << '\t' << UScoreSig[a][i] << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << userTreeScores[a][i] << '\t' << UScoreSig[a][i] << endl; } } @@ -357,8 +357,10 @@ void ParsimonyCommand::setGroups() { } //ABC - groupComb.push_back(allGroups); - numComp++; + if (numComp != 1) { + groupComb.push_back(allGroups); + numComp++; + } } catch(exception& e) { diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 9a1ad0c..c345ca1 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -18,10 +18,14 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { tmap = globaldata->gTreemap; unweightedFile = globaldata->getTreeFile() + ".unweighted"; openOutputFile(unweightedFile, out); + //column headers + out << "Comb" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + sumFile = globaldata->getTreeFile() + ".uwsummary"; openOutputFile(sumFile, outSum); - distFile = globaldata->getTreeFile() + ".uwdistrib"; - openOutputFile(distFile, outDist); + //column headers + outSum << "Tree#" << '\t' << "Comb" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl; + setGroups(); //sets users groups to analyze convert(globaldata->getIters(), iters); //how many random trees to generate unweighted = new Unweighted(tmap); @@ -39,112 +43,104 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { /***********************************************************/ int UnifracUnweightedCommand::execute() { try { - - //get unweighted for users tree - userData.resize(1,0); //data[0] = unweightedscore - randomData.resize(1,0); //data[0] = unweightedscore - - //format output - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - - outDist << "Groups Used "; - for (int m = 0; m < globaldata->Groups.size(); m++) { - outDist << globaldata->Groups[m] << " "; - } - outDist << endl; - - outDist << "Tree#" << '\t' << "Iter" << '\t' << "UWScore" << endl; - + + userData.resize(numComp,0); //data[0] = unweightedscore + randomData.resize(numComp,0); //data[0] = unweightedscore //create new tree with same num nodes and leaves as users randT = new Tree(); //get pscores for users trees for (int i = 0; i < T.size(); i++) { + //get unweighted for users tree + rscoreFreq.resize(numComp); + uscoreFreq.resize(numComp); + rCumul.resize(numComp); + uCumul.resize(numComp); + validScores.resize(numComp); + utreeScores.resize(numComp); + UWScoreSig.resize(numComp); + cout << "Processing tree " << i+1 << endl; + outSum << "Tree#" << i+1 << endl; + out << "Tree#" << i+1 << endl; userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore - //update uscoreFreq - it = uscoreFreq.find(userData[0]); - if (it == uscoreFreq.end()) {//new score - uscoreFreq[userData[0]] = 1; - }else{ uscoreFreq[userData[0]]++; } + //output scores for each combination + for(int k = 0; k < numComp; k++) { + //update uscoreFreq + it = uscoreFreq[k].find(userData[k]); + if (it == uscoreFreq[k].end()) {//new score + uscoreFreq[k][userData[k]] = 1; + }else{ uscoreFreq[k][userData[k]]++; } - //add users score to valid scores - validScores[userData[0]] = userData[0]; + //add users score to valid scores + validScores[k][userData[k]] = userData[k]; - //saves users score - utreeScores.push_back(userData[0]); + //saves users score + utreeScores[k].push_back(userData[k]); + } //copy T[i]'s info. randT->getCopy(T[i]); //get unweighted scores for random trees for (int j = 0; j < iters; j++) { - //create a random tree with same topology as T[i], but different labels - randT->assembleRandomUnifracTree(); - //get pscore of random tree - randomData = unweighted->getValues(randT); + int count = 0; + for (int r=0; rgetValues(randT, "", ""); - //add trees unweighted score to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; - } + //add trees unweighted score to map of scores + it2 = rscoreFreq[count].find(randomData[count]); + if (it2 != rscoreFreq[count].end()) {//already have that score + rscoreFreq[count][randomData[count]]++; + }else{//first time we have seen this score + rscoreFreq[count][randomData[count]] = 1; + } - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; - - //output info to uwdistrib file - outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl; + //add randoms score to validscores + validScores[count][randomData[count]] = randomData[count]; + count++; + } + } } - - saveRandomScores(); //save all random scores for unweighted file - - //find the signifigance of the score + + for(int a = 0; a < numComp; a++) { + float ucumul = 1.0000; float rcumul = 1.0000; - for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { - rCumul[it->first] = rcumul; + //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. + for (it = validScores[a].begin(); it != validScores[a].end(); it++) { + it2 = uscoreFreq[a].find(it->first); + //make uCumul map + uCumul[a][it->first] = ucumul; + //user data has that score + if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul-= it2->second; } + else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score + + //make rscoreFreq map and rCumul + it2 = rscoreFreq[a].find(it->first); + rCumul[a][it->first] = rcumul; //get percentage of random trees with that info - rscoreFreq[it->first] /= iters; - rcumul-= it->second; + if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; } + else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score } - - //save the signifigance of the users score for printing later - UWScoreSig.push_back(rCumul[userData[0]]); - - - //clear random data - rscoreFreq.clear(); //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees. - rCumul.clear(); - } - - float ucumul = 1.0000; - float rcumul = 1.0000; - //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (it = validScores.begin(); it != validScores.end(); it++) { - it2 = uscoreFreq.find(it->first); - //make uCumul map - uCumul[it->first] = ucumul; - //user data has that score - if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul-= it2->second; } - else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score - - //make rscoreFreq map and rCumul - it2 = totalrscoreFreq.find(it->first); - rCumul[it->first] = rcumul; - //get percentage of random trees with that info - if (it2 != totalrscoreFreq.end()) { totalrscoreFreq[it->first] /= (iters*T.size()); rcumul-= it2->second; } - else { totalrscoreFreq[it->first] = 0.0000; } //no random trees with that score - + UWScoreSig[a].push_back(rCumul[a][userData[a]]); } printUnweightedFile(); printUWSummaryFile(); + rscoreFreq.clear(); + uscoreFreq.clear(); + rCumul.clear(); + uCumul.clear(); + validScores.clear(); + utreeScores.clear(); + UWScoreSig.clear(); + } //reset groups parameter - globaldata->Groups.clear(); + globaldata->Groups.clear(); globaldata->setGroups(""); delete randT; @@ -163,24 +159,15 @@ int UnifracUnweightedCommand::execute() { /***********************************************************/ void UnifracUnweightedCommand::printUnweightedFile() { try { - //column headers - - out << "Groups Used "; - for (int m = 0; m < globaldata->Groups.size(); m++) { - out << globaldata->Groups[m] << " "; - } - out << endl; - - out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; - //format output out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - //print each line - for (it = validScores.begin(); it != validScores.end(); it++) { - out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << totalrscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - } - + for(int a = 0; a < numComp; a++) { + //print each line + for (it = validScores[a].begin(); it != validScores[a].end(); it++) { + out << setprecision(6) << groupComb[a] << '\t' << it->first << '\t' << '\t' << uscoreFreq[a][it->first] << '\t' << uCumul[a][it->first] << '\t' << rscoreFreq[a][it->first] << '\t' << rCumul[a][it->first] << endl; + } + } out.close(); } @@ -197,22 +184,16 @@ void UnifracUnweightedCommand::printUnweightedFile() { /***********************************************************/ void UnifracUnweightedCommand::printUWSummaryFile() { try { - //column headers - - outSum << "Groups Used "; - for (int m = 0; m < globaldata->Groups.size(); m++) { - outSum << globaldata->Groups[m] << " "; - } - outSum << endl; - - outSum << "Tree#" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl; - + //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); //print each line for (int i = 0; i< T.size(); i++) { - outSum << setprecision(6) << i+1 << '\t' << '\t' << utreeScores[i] << '\t' << UWScoreSig[i] << endl; + for(int a = 0; a < numComp; a++) { + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << utreeScores[a][i] << '\t' << UWScoreSig[a][i] << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << utreeScores[a][i] << '\t' << UWScoreSig[a][i] << endl; + } } outSum.close(); @@ -226,57 +207,66 @@ void UnifracUnweightedCommand::printUWSummaryFile() { exit(1); } } -/***********************************************************/ -void UnifracUnweightedCommand::saveRandomScores() { - try { - for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { - //does this score already exist in the total map - it2 = totalrscoreFreq.find(it->first); - //if yes then add them - if (it2 != totalrscoreFreq.end()) { - totalrscoreFreq[it->first] += rscoreFreq[it->first]; - }else{ //its a new score - totalrscoreFreq[it->first] = rscoreFreq[it->first]; - } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the UnifracUnweightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - /***********************************************************/ void UnifracUnweightedCommand::setGroups() { try { + string allGroups = ""; + numGroups = 0; //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() != 0) { - //check that groups are valid - for (int i = 0; i < globaldata->Groups.size(); i++) { - if (tmap->isValidGroup(globaldata->Groups[i]) != true) { - cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; - // erase the invalid group from globaldata->Groups - globaldata->Groups.erase (globaldata->Groups.begin()+i); + if (globaldata->Groups[0] != "all") { + //check that groups are valid + for (int i = 0; i < globaldata->Groups.size(); i++) { + if (tmap->isValidGroup(globaldata->Groups[i]) != true) { + cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; + // erase the invalid group from globaldata->Groups + globaldata->Groups.erase(globaldata->Groups.begin()+i); + } } - } - //if the user only entered invalid groups - if (globaldata->Groups.size() == 0) { - cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + //if the user only entered invalid groups + if (globaldata->Groups.size() == 0) { + cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i]; + } + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + allGroups += tmap->namesOfGroups[i]; + numGroups++; + } + } + }else{//user has enter "all" and wants the default groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i]; } + globaldata->setGroups(""); } - }else { for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + allGroups += tmap->namesOfGroups[i]; } + numGroups = 1; + } + + //calculate number of comparsions + numComp = 0; + for (int r=0; rGroups[r]+globaldata->Groups[l]); + numComp++; + } + } + + //ABC + if (numComp != 1) { + groupComb.push_back(allGroups); + numComp++; } } catch(exception& e) { diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h index 7bf5338..e026bc2 100644 --- a/unifracunweightedcommand.h +++ b/unifracunweightedcommand.h @@ -28,29 +28,28 @@ class UnifracUnweightedCommand : public Command { private: GlobalData* globaldata; vector T; //user trees - vector utreeScores; //user tree unweighted scores - vector UWScoreSig; //tree unweighted score signifigance when compared to random trees - percentage of random trees with that score or lower. Tree* randT; //random tree TreeMap* tmap; Unweighted* unweighted; - string sumFile, distFile, unweightedFile; - int iters; + string sumFile, unweightedFile; + vector groupComb; // AB. AC, BC... + int iters, numGroups, numComp; EstOutput userData; //unweighted score info for user tree EstOutput randomData; //unweighted score info for random trees - map validScores; //contains scores from both user and random - map rscoreFreq; //unweighted score, number of random trees with that score. - map uscoreFreq; //unweighted, number of user trees with that score. - map totalrscoreFreq; //unweighted score, number of random trees with that score. - map rCumul; //unweighted score, cumulative percentage of number of random trees with that score or higher. - map uCumul; //unweighted, cumulative percentage of number of user trees with that score or higher . - map::iterator it; + vector< vector > utreeScores; //scores for users trees for each comb. + vector< vector > UWScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or higher. + vector< map > validScores; //map contains scores from both user and random + vector< map > rscoreFreq; //map -vector entry for each combination. + vector< map > uscoreFreq; //map -vector entry for each combination. + vector< map > rCumul; //map -vector entry for each combination. + vector< map > uCumul; //map -vector entry for each combination. map::iterator it; map::iterator it2; + map::iterator it; - ofstream outSum, outDist, out; + ofstream outSum, out; void printUWSummaryFile(); void printUnweightedFile(); - void saveRandomScores(); void setGroups(); }; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index f7ad525..73eab59 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -16,10 +16,10 @@ UnifracWeightedCommand::UnifracWeightedCommand() { T = globaldata->gTree; tmap = globaldata->gTreemap; - weightedFile = globaldata->getTreeFile() + ".weighted"; - openOutputFile(weightedFile, out); + //weightedFile = globaldata->getTreeFile() + ".weighted"; + //openOutputFile(weightedFile, out); //column headers - out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + //out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; sumFile = globaldata->getTreeFile() + ".wsummary"; openOutputFile(sumFile, outSum); @@ -78,17 +78,11 @@ int UnifracWeightedCommand::execute() { //copy T[i]'s info. randT->getCopy(T[i]); - if (globaldata->Groups.size() != 0) { - //create a random tree with same topology as T[i], but different labels - randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]); - //get wscore of random tree - randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]); - }else { - //create a random tree with same topology as T[i], but different labels - randT->assembleRandomUnifracTree(tmap->namesOfGroups[r], tmap->namesOfGroups[l]); - //get wscore of random tree - randomData = weighted->getValues(randT, tmap->namesOfGroups[r], tmap->namesOfGroups[l]); - } + //create a random tree with same topology as T[i], but different labels + randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]); + //get wscore of random tree + randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]); + //save scores rScores[count].push_back(randomData[0]); validScores[count][randomData[0]] = randomData[0]; @@ -113,7 +107,7 @@ int UnifracWeightedCommand::execute() { WScoreSig.push_back((iters-index)/(float)iters); } - out << "Tree# " << i << endl; + //out << "Tree# " << i << endl; //printWeightedFile(); //clear data @@ -247,6 +241,9 @@ void UnifracWeightedCommand::setGroups() { //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() == 0) { numGroups = tmap->getNumGroups(); + for (int i=0; i < numGroups; i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + } }else { if (globaldata->getGroups() != "all") { //check that groups are valid @@ -261,33 +258,36 @@ void UnifracWeightedCommand::setGroups() { //if the user only entered invalid groups if (globaldata->Groups.size() == 0) { numGroups = tmap->getNumGroups(); + for (int i=0; i < numGroups; i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + } cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; }else if (globaldata->Groups.size() == 1) { cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; numGroups = tmap->getNumGroups(); globaldata->Groups.clear(); + for (int i=0; i < numGroups; i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + } }else { numGroups = globaldata->Groups.size(); } }else { //users wants all groups numGroups = tmap->getNumGroups(); globaldata->Groups.clear(); globaldata->setGroups(""); + for (int i=0; i < numGroups; i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + } } } //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; numComp = 0; - int n = 1; - for (int i=1; iGroups.size() != 0) { - groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]); - }else { - groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]); - } + groupComb.push_back(globaldata->Groups[i]+globaldata->Groups[l]); } - n++; } } catch(exception& e) { diff --git a/unweighted.cpp b/unweighted.cpp index b224996..1ceca42 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -15,68 +15,358 @@ EstOutput Unweighted::getValues(Tree* t) { try { globaldata = GlobalData::getInstance(); - //clear out old values - data.resize(1,0); - - double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group - double totalBL = 0.00; //all branch lengths - double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - + vector groups; + double UniqueBL; //a branch length is unique if it's chidren are from the same group + double totalBL; //all branch lengths + double UW; //Unweighted Value = UniqueBL / totalBL; map::iterator it; //iterator to traverse pgroups map copyIpcount; + + + //if the users enters no groups then give them the score of all groups + int numGroups = globaldata->Groups.size(); + + //calculate number of comparsions + int numComp = 0; + for (int r=0; rGroups[a]); groups.push_back(globaldata->Groups[l]); - for(int i=t->getNumLeaves();igetNumNodes();i++){ + for(int i=t->getNumLeaves();igetNumNodes();i++){ - int lc = t->tree[i].getLChild(); //lc = vector index of left child - int rc = t->tree[i].getRChild(); //rc = vector index of right child + int lc = t->tree[i].getLChild(); //lc = vector index of left child + int rc = t->tree[i].getRChild(); //rc = vector index of right child - /**********************************************************************/ - //This section adds in all lengths that are non leaf + /**********************************************************************/ + //This section adds in all lengths that are non leaf - copyIpcount = t->tree[i].pcount; - for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) { - if (inUsersGroups(it->first, globaldata->Groups) != true) { copyIpcount.erase(it->first); } - } + copyIpcount = t->tree[i].pcount; + for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) { + if (inUsersGroups(it->first, groups) != true) { copyIpcount.erase(it->first); } + } - //if i's children are from the same group then i's pcount size will be 1 - //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want - if (copyIpcount.size() == 0) { } - else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += t->tree[i].getBranchLength(); } + //if i's children are from the same group then i's pcount size will be 1 + //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want + if (copyIpcount.size() == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += t->tree[i].getBranchLength(); } - //add i's BL to total if it is from the groups the user wants - if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { - totalBL += t->tree[i].getBranchLength(); - } + //add i's BL to total if it is from the groups the user wants + if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + totalBL += t->tree[i].getBranchLength(); + } - /**********************************************************************/ - //This section adds in all lengths that are leaf + /**********************************************************************/ + //This section adds in all lengths that are leaf - //if i's chidren are leaves - if (t->tree[rc].getRChild() == -1) { - //if rc is a valid group and rc has a BL - if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) { - UniqueBL += t->tree[rc].getBranchLength(); - totalBL += t->tree[rc].getBranchLength(); + //if i's chidren are leaves + if (t->tree[rc].getRChild() == -1) { + //if rc is a valid group and rc has a BL + if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) { + UniqueBL += t->tree[rc].getBranchLength(); + totalBL += t->tree[rc].getBranchLength(); + } + } + + if (t->tree[lc].getLChild() == -1) { + //if lc is a valid group and lc has a BL + if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) { + UniqueBL += t->tree[lc].getBranchLength(); + totalBL += t->tree[lc].getBranchLength(); + } + } + + /**********************************************************************/ } + + UW = (UniqueBL / totalBL); + + if (isnan(UW) || isinf(UW)) { UW = 0; } + + data[count] = UW; + count++; + groups.clear(); } - - if (t->tree[lc].getLChild() == -1) { - //if lc is a valid group and lc has a BL - if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) { - UniqueBL += t->tree[lc].getBranchLength(); - totalBL += t->tree[lc].getBranchLength(); + } + + + if (numComp != 1) { + if (numGroups == 0) { + //get score for all users groups + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + groups.push_back(tmap->namesOfGroups[i]); + } + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + groups.push_back(globaldata->Groups[i]); } } + + UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group + totalBL = 0.00; //all branch lengths + UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + copyIpcount.clear(); + + for(int i=t->getNumLeaves();igetNumNodes();i++){ + + int lc = t->tree[i].getLChild(); //lc = vector index of left child + int rc = t->tree[i].getRChild(); //rc = vector index of right child + + /**********************************************************************/ + //This section adds in all lengths that are non leaf + + copyIpcount = t->tree[i].pcount; + for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) { + if (inUsersGroups(it->first, groups) != true) { copyIpcount.erase(it->first); } + } + + //if i's children are from the same group then i's pcount size will be 1 + //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want + if (copyIpcount.size() == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += t->tree[i].getBranchLength(); } - /**********************************************************************/ + //add i's BL to total if it is from the groups the user wants + if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + totalBL += t->tree[i].getBranchLength(); + } + + /**********************************************************************/ + //This section adds in all lengths that are leaf + + //if i's chidren are leaves + if (t->tree[rc].getRChild() == -1) { + //if rc is a valid group and rc has a BL + if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) { + UniqueBL += t->tree[rc].getBranchLength(); + totalBL += t->tree[rc].getBranchLength(); + } + } + + if (t->tree[lc].getLChild() == -1) { + //if lc is a valid group and lc has a BL + if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) { + UniqueBL += t->tree[lc].getBranchLength(); + totalBL += t->tree[lc].getBranchLength(); + } + } + + /**********************************************************************/ + } + + UW = (UniqueBL / totalBL); + + if (isnan(UW) || isinf(UW)) { UW = 0; } + + data[count] = UW; } + + return data; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Unweighted class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Unweighted class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} + +/**************************************************************************************************/ + +EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { + try { + globaldata = GlobalData::getInstance(); + + vector groups; + double UniqueBL; //a branch length is unique if it's chidren are from the same group + double totalBL; //all branch lengths + double UW; //Unweighted Value = UniqueBL / totalBL; + map::iterator it; //iterator to traverse pgroups + map copyIpcount; + copyTree = new Tree; + + //if the users enters no groups then give them the score of all groups + int numGroups = globaldata->Groups.size(); + + //calculate number of comparsions + int numComp = 0; + for (int r=0; rgetCopy(t); + + //swap labels in the groups you want to compare + copyTree->assembleRandomUnifracTree(globaldata->Groups[a], globaldata->Groups[l]); + + //groups in this combo + groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]); + + for(int i=t->getNumLeaves();igetNumNodes();i++){ - UW = (UniqueBL / totalBL); + int lc = t->tree[i].getLChild(); //lc = vector index of left child + int rc = t->tree[i].getRChild(); //rc = vector index of right child + + /**********************************************************************/ + //This section adds in all lengths that are non leaf + + copyIpcount = t->tree[i].pcount; + for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) { + if (inUsersGroups(it->first, groups) != true) { copyIpcount.erase(it->first); } + } + + //if i's children are from the same group then i's pcount size will be 1 + //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want + if (copyIpcount.size() == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += t->tree[i].getBranchLength(); } + + //add i's BL to total if it is from the groups the user wants + if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + totalBL += t->tree[i].getBranchLength(); + } + + /**********************************************************************/ + //This section adds in all lengths that are leaf + + //if i's chidren are leaves + if (t->tree[rc].getRChild() == -1) { + //if rc is a valid group and rc has a BL + if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) { + UniqueBL += t->tree[rc].getBranchLength(); + totalBL += t->tree[rc].getBranchLength(); + } + } + + if (t->tree[lc].getLChild() == -1) { + //if lc is a valid group and lc has a BL + if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) { + UniqueBL += t->tree[lc].getBranchLength(); + totalBL += t->tree[lc].getBranchLength(); + } + } + + /**********************************************************************/ + } + + UW = (UniqueBL / totalBL); - if (isnan(UW) || isinf(UW)) { UW = 0; } + if (isnan(UW) || isinf(UW)) { UW = 0; } - data[0] = UW; + data[count] = UW; + count++; + groups.clear(); + } + } + + if (numComp != 1) { + if (numGroups == 0) { + //get score for all users groups + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + groups.push_back(tmap->namesOfGroups[i]); + } + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + groups.push_back(globaldata->Groups[i]); + } + } + + UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group + totalBL = 0.00; //all branch lengths + UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + copyIpcount.clear(); + + //copy random tree passed in + copyTree->getCopy(t); + + //swap labels in all the groups you want to compare + copyTree->assembleRandomUnifracTree(); + + for(int i=t->getNumLeaves();igetNumNodes();i++){ + + int lc = t->tree[i].getLChild(); //lc = vector index of left child + int rc = t->tree[i].getRChild(); //rc = vector index of right child + + /**********************************************************************/ + //This section adds in all lengths that are non leaf + + copyIpcount = t->tree[i].pcount; + for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) { + if (inUsersGroups(it->first, groups) != true) { copyIpcount.erase(it->first); } + } + + //if i's children are from the same group then i's pcount size will be 1 + //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want + if (copyIpcount.size() == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += t->tree[i].getBranchLength(); } + + //add i's BL to total if it is from the groups the user wants + if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + totalBL += t->tree[i].getBranchLength(); + } + + /**********************************************************************/ + //This section adds in all lengths that are leaf + + //if i's chidren are leaves + if (t->tree[rc].getRChild() == -1) { + //if rc is a valid group and rc has a BL + if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) { + UniqueBL += t->tree[rc].getBranchLength(); + totalBL += t->tree[rc].getBranchLength(); + } + } + + if (t->tree[lc].getLChild() == -1) { + //if lc is a valid group and lc has a BL + if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) { + UniqueBL += t->tree[lc].getBranchLength(); + totalBL += t->tree[lc].getBranchLength(); + } + } + + /**********************************************************************/ + } + + UW = (UniqueBL / totalBL); + + if (isnan(UW) || isinf(UW)) { UW = 0; } + + data[count] = UW; + } + return data; } @@ -88,6 +378,8 @@ EstOutput Unweighted::getValues(Tree* t) { cout << "An unknown error has occurred in the Unweighted class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } - } + + + diff --git a/unweighted.h b/unweighted.h index b24ad22..8751469 100644 --- a/unweighted.h +++ b/unweighted.h @@ -22,10 +22,11 @@ class Unweighted : public TreeCalculator { Unweighted(TreeMap* t) : tmap(t) {}; ~Unweighted() {}; EstOutput getValues(Tree*); - EstOutput getValues(Tree*, string, string) { return data; }; + EstOutput getValues(Tree*, string, string); private: GlobalData* globaldata; + Tree* copyTree; EstOutput data; TreeMap* tmap; diff --git a/weighted.cpp b/weighted.cpp index 915cf36..fa22716 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -17,24 +17,14 @@ EstOutput Weighted::getValues(Tree* t) { int numGroups; vector D; - //if the user has not entered specific groups to analyze then do them all - if (globaldata->Groups.size() == 0) { - numGroups = tmap->getNumGroups(); - }else { - numGroups = globaldata->Groups.size(); - } + numGroups = globaldata->Groups.size(); //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; - int n = 1; int count = 0; - for (int i=1; iGroups.size() == 0) { - WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] = 0.0; - }else { - WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] = 0.0; - } + WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0; D.push_back(0.0000); //initialize a spot in D for each combination @@ -59,106 +49,62 @@ EstOutput Weighted::getValues(Tree* t) { sum += t->tree[index].getBranchLength(); } - if (globaldata->Groups.size() == 0) { - //is this sum from a sequence which is in one of the users groups - if (inUsersGroups(t->tree[v].getGroup(), tmap->namesOfGroups) == true) { - //is this sum from a sequence which is in this groupCombo - if ((t->tree[v].getGroup() == tmap->namesOfGroups[i-1]) || (t->tree[v].getGroup() == tmap->namesOfGroups[l])) { - sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()]; - D[count] += sum; - } - } - }else { - //is this sum from a sequence which is in one of the users groups - if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) { - //is this sum from a sequence which is in this groupCombo - if ((t->tree[v].getGroup() == globaldata->Groups[i-1]) || (t->tree[v].getGroup() == globaldata->Groups[l])) { - sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()]; - D[count] += sum; - } + //is this sum from a sequence which is in one of the users groups + if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) { + //is this sum from a sequence which is in this groupCombo + if ((t->tree[v].getGroup() == globaldata->Groups[i]) || (t->tree[v].getGroup() == globaldata->Groups[l])) { + sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()]; + D[count] += sum; } } } /*********************************************************/ count++; } - n++; } data.clear(); //clear out old values for(int i=0;igetNumNodes();i++){ //calculate weighted score for each of the group comb i.e. with groups A,B,C = AB, AC, BC. - n = 1; - for (int b=1; bGroups.size() == 0) { - //does this node have descendants from group b-1 - it = t->tree[i].pcount.find(tmap->namesOfGroups[b-1]); - //if it does u = # of its descendants with a certain group / total number in tree with a certain group - if (it != t->tree[i].pcount.end()) { - u = (double) t->tree[i].pcount[tmap->namesOfGroups[b-1]] / (double) tmap->seqsPerGroup[tmap->namesOfGroups[b-1]]; - }else { u = 0.00; } + //does this node have descendants from group b-1 + it = t->tree[i].pcount.find(globaldata->Groups[b]); + //if it does u = # of its descendants with a certain group / total number in tree with a certain group + if (it != t->tree[i].pcount.end()) { + u = (double) t->tree[i].pcount[globaldata->Groups[b]] / (double) tmap->seqsPerGroup[globaldata->Groups[b]]; + }else { u = 0.00; } - //does this node have descendants from group l - it = t->tree[i].pcount.find(tmap->namesOfGroups[l]); - //if it does subtract their percentage from u - if (it != t->tree[i].pcount.end()) { - u -= (double) t->tree[i].pcount[tmap->namesOfGroups[l]] / (double) tmap->seqsPerGroup[tmap->namesOfGroups[l]]; - } - - u = abs(u) * t->tree[i].getBranchLength(); - - //save groupcombs u value - WScore[tmap->namesOfGroups[b-1]+tmap->namesOfGroups[l]] += u; - - //the user has entered specific groups - }else { - //does this node have descendants from group b-1 - it = t->tree[i].pcount.find(globaldata->Groups[b-1]); - //if it does u = # of its descendants with a certain group / total number in tree with a certain group - if (it != t->tree[i].pcount.end()) { - u = (double) t->tree[i].pcount[globaldata->Groups[b-1]] / (double) tmap->seqsPerGroup[globaldata->Groups[b-1]]; - }else { u = 0.00; } - - //does this node have descendants from group l - it = t->tree[i].pcount.find(globaldata->Groups[l]); - //if it does subtract their percentage from u - if (it != t->tree[i].pcount.end()) { - u -= (double) t->tree[i].pcount[globaldata->Groups[l]] / (double) tmap->seqsPerGroup[globaldata->Groups[l]]; - } + //does this node have descendants from group l + it = t->tree[i].pcount.find(globaldata->Groups[l]); + //if it does subtract their percentage from u + if (it != t->tree[i].pcount.end()) { + u -= (double) t->tree[i].pcount[globaldata->Groups[l]] / (double) tmap->seqsPerGroup[globaldata->Groups[l]]; + } - u = abs(u) * t->tree[i].getBranchLength(); + u = abs(u) * t->tree[i].getBranchLength(); - //save groupcombs u value - WScore[globaldata->Groups[b-1]+globaldata->Groups[l]] += u; - } + //save groupcombs u value + WScore[globaldata->Groups[b]+globaldata->Groups[l]] += u; /*********************************************************/ } - n++; } } //calculate weighted score for each group combination double UN; - n = 1; count = 0; - for (int i=1; iGroups.size() == 0) { - UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D[count]); - }else {//they have entered specific groups - UN = (WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] / D[count]); - } + for (int i=0; iGroups[i]+globaldata->Groups[l]] / D[count]); + if (isnan(UN) || isinf(UN)) { UN = 0; } data.push_back(UN); count++; } - n++; } return data; }