From 3c5c4e255ee8c36feb9e97aebc4e792e6ff8c440 Mon Sep 17 00:00:00 2001 From: westcott Date: Tue, 24 Feb 2009 16:34:41 +0000 Subject: [PATCH] changed unifrac.weighted() --- globaldata.cpp | 6 +- globaldata.hpp | 2 + parsimony.cpp | 6 +- parsimonycommand.cpp | 39 ++++++------- parsimonycommand.h | 4 +- unifracweightedcommand.cpp | 116 ++++++++++++++++++++----------------- unifracweightedcommand.h | 7 ++- weighted.cpp | 92 ++++++++++++++++------------- 8 files changed, 146 insertions(+), 126 deletions(-) diff --git a/globaldata.cpp b/globaldata.cpp index 66a44f2..015293d 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -110,7 +110,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "fileroot" ) { fileroot = value; } if (key == "abund" ) { abund = value; } if (key == "random" ) { randomtree = value; } - if (key == "groups" ) { groups = value; } if (key == "calc") { calc = value; } @@ -160,7 +159,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "fileroot" ) { fileroot = value; } if (key == "abund" ) { abund = value; } if (key == "random" ) { randomtree = value; } - if (key == "groups" ) { groups = value; } if (key == "calc") { calc = value; } @@ -261,6 +259,7 @@ string GlobalData::getJumble() { return jumble; } string GlobalData::getFreq() { return freq; } string GlobalData::getAbund() { return abund; } string GlobalData::getRandomTree() { return randomtree; } +string GlobalData::getGroups() { return groups; } void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} @@ -269,7 +268,8 @@ void GlobalData::setColumnFile(string file) { columnfile = file; inputFileNam void GlobalData::setNameFile(string file) { namefile = file; } void GlobalData::setFormat(string Format) { format = Format; } void GlobalData::setRandomTree(string Random) { randomtree = Random; } -void GlobalData::setCalc(string Calc) { calc = Calc; } +void GlobalData::setGroups(string g) { groups = g; } +void GlobalData::setCalc(string Calc) { calc = Calc; } /*******************************************************/ diff --git a/globaldata.hpp b/globaldata.hpp index 1bccc34..309ab4a 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -57,6 +57,7 @@ public: string getFreq(); string getAbund(); string getRandomTree(); + string getGroups(); void setListFile(string); void setPhylipFile(string); @@ -66,6 +67,7 @@ public: void setSabundFile(string); void setFormat(string); void setRandomTree(string); + void setGroups(string); void setCalc(string); diff --git a/parsimony.cpp b/parsimony.cpp index 93efe28..da8412c 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -22,7 +22,7 @@ EstOutput Parsimony::getValues(Tree* t) { for(int i=t->getNumLeaves();igetNumNodes();i++){ t->tree[i].pGroups = (t->mergeUserGroups(i)); } - //hjkl + for(int i=t->getNumLeaves();igetNumNodes();i++){ int lc = t->tree[i].getLChild(); int rc = t->tree[i].getRChild(); @@ -42,8 +42,8 @@ EstOutput Parsimony::getValues(Tree* t) { t->tree[i].printNode(); } - string hold; - cin >> hold; + //string hold; + //cin >> hold; data[0] = score; diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 59dca88..1a493bd 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -25,8 +25,6 @@ ParsimonyCommand::ParsimonyCommand() { openOutputFile(parsFile, out); sumFile = globaldata->getTreeFile() + ".psummary"; openOutputFile(sumFile, outSum); - distFile = globaldata->getTreeFile() + ".pdistrib"; - openOutputFile(distFile, outDist); //set users groups to analyze setGroups(); }else { //user wants random distribution @@ -57,10 +55,6 @@ int ParsimonyCommand::execute() { userData.resize(1,0); //data[0] = pscore. randomData.resize(1,0); //data[0] = pscore. - //format output - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - outDist << "RandomTree#" << '\t' << "ParsScore" << endl; - if (randomtree == "") { copyUserTree = new Tree(); //get pscores for users trees @@ -104,9 +98,6 @@ int ParsimonyCommand::execute() { //add randoms score to validscores validScores[randomData[0]] = randomData[0]; - //output info to pdistrib file - outDist << j+1 << '\t'<< '\t' << randomData[0] << endl; - delete randT; } }else { @@ -170,7 +161,7 @@ int ParsimonyCommand::execute() { //reset randomTree parameter to "" globaldata->setRandomTree(""); //reset groups parameter - globaldata->Groups.clear(); + globaldata->Groups.clear(); globaldata->setGroups(""); return 0; @@ -297,23 +288,29 @@ void ParsimonyCommand::setGroups() { try { //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() != 0) { - //check that groups are valid - for (int i = 0; i < globaldata->Groups.size(); i++) { - if (tmap->isValidGroup(globaldata->Groups[i]) != true) { - cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; - // erase the invalid group from globaldata->Groups - globaldata->Groups.erase (globaldata->Groups.begin()+i); + if (globaldata->Groups[0] != "all") { + //check that groups are valid + for (int i = 0; i < globaldata->Groups.size(); i++) { + if (tmap->isValidGroup(globaldata->Groups[i]) != true) { + cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; + // erase the invalid group from globaldata->Groups + globaldata->Groups.erase (globaldata->Groups.begin()+i); + } } - } - //if the user only entered invalid groups - if (globaldata->Groups.size() == 0) { - cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + //if the user only entered invalid groups + if (globaldata->Groups.size() == 0) { + cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + } + } + }else{//user has enter "all" and wants the default groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { globaldata->Groups.push_back(tmap->namesOfGroups[i]); } + globaldata->setGroups(""); } - }else { for (int i = 0; i < tmap->namesOfGroups.size(); i++) { globaldata->Groups.push_back(tmap->namesOfGroups[i]); diff --git a/parsimonycommand.h b/parsimonycommand.h index 2881dc6..ba30d40 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -32,7 +32,7 @@ class ParsimonyCommand : public Command { TreeMap* tmap; TreeMap* savetmap; Parsimony* pars; - string parsFile, sumFile, distFile, randomtree; + string parsFile, sumFile, randomtree; int iters, numGroups; vector numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib. vector userTreeScores; //scores for users trees @@ -47,7 +47,7 @@ class ParsimonyCommand : public Command { map::iterator it; map::iterator it2; - ofstream out, outSum, outDist; + ofstream out, outSum; void printParsimonyFile(); void printUSummaryFile(); diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index fef08ff..4ec2773 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -20,49 +20,8 @@ UnifracWeightedCommand::UnifracWeightedCommand() { openOutputFile(weightedFile, out); sumFile = globaldata->getTreeFile() + ".wsummary"; openOutputFile(sumFile, outSum); - distFile = globaldata->getTreeFile() + ".wdistrib"; - openOutputFile(distFile, outDist); - - //if the user has not entered specific groups to analyze then do them all - if (globaldata->Groups.size() == 0) { - numGroups = tmap->getNumGroups(); - }else { - //check that groups are valid - for (int i = 0; i < globaldata->Groups.size(); i++) { - if (tmap->isValidGroup(globaldata->Groups[i]) != true) { - cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; - // erase the invalid group from globaldata->Groups - globaldata->Groups.erase (globaldata->Groups.begin()+i); - } - } - - //if the user only entered invalid groups - if (globaldata->Groups.size() == 0) { - numGroups = tmap->getNumGroups(); - cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; - }else if (globaldata->Groups.size() == 1) { - cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; - numGroups = tmap->getNumGroups(); - globaldata->Groups.clear(); - }else { numGroups = globaldata->Groups.size(); } - } - - //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; - numComp = 0; - int n = 1; - for (int i=1; iGroups.size() != 0) { - groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]); - }else { - groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]); - } - } - n++; - } - + + setGroups(); //sets the groups the user wants to analyze convert(globaldata->getIters(), iters); //how many random trees to generate weighted = new Weighted(tmap); @@ -88,11 +47,6 @@ int UnifracWeightedCommand::execute() { totalrscoreFreq.resize(numComp); uCumul.resize(numComp); - //format output - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - outDist << "Tree#" << '\t' << "Iter" << '\t' << "Groups"<< '\t' << "WScore" << endl; - - //create new tree with same num nodes and leaves as users randT = new Tree(); @@ -141,9 +95,6 @@ int UnifracWeightedCommand::execute() { //add random score to valid scores validScores[p][randomData[p]] = randomData[p]; - - //output info to uwdistrib file - outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << groupComb[p] << '\t'<< randomData[p] << endl; } } @@ -198,9 +149,6 @@ int UnifracWeightedCommand::execute() { printWeightedFile(); printWSummaryFile(); - //reset randomTree parameter to 0 - globaldata->setRandomTree("0"); - //clear out users groups globaldata->Groups.clear(); @@ -255,6 +203,7 @@ void UnifracWeightedCommand::printWSummaryFile() { try { //column headers outSum << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" << endl; + cout << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" << endl; //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); @@ -264,6 +213,7 @@ void UnifracWeightedCommand::printWSummaryFile() { for (int i = 0; i < T.size(); i++) { for (int j = 0; j < numComp; j++) { outSum << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl; + cout << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl; count++; } } @@ -307,3 +257,61 @@ void UnifracWeightedCommand::saveRandomScores() { } /***********************************************************/ +void UnifracWeightedCommand::setGroups() { + try { + //if the user has not entered specific groups to analyze then do them all + if (globaldata->Groups.size() == 0) { + numGroups = tmap->getNumGroups(); + }else { + if (globaldata->getGroups() != "all") { + //check that groups are valid + for (int i = 0; i < globaldata->Groups.size(); i++) { + if (tmap->isValidGroup(globaldata->Groups[i]) != true) { + cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; + // erase the invalid group from globaldata->Groups + globaldata->Groups.erase (globaldata->Groups.begin()+i); + } + } + + //if the user only entered invalid groups + if (globaldata->Groups.size() == 0) { + numGroups = tmap->getNumGroups(); + cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; + }else if (globaldata->Groups.size() == 1) { + cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; + numGroups = tmap->getNumGroups(); + globaldata->Groups.clear(); + }else { numGroups = globaldata->Groups.size(); } + }else { //users wants all groups + numGroups = tmap->getNumGroups(); + globaldata->Groups.clear(); + globaldata->setGroups(""); + } + } + + //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; + numComp = 0; + int n = 1; + for (int i=1; iGroups.size() != 0) { + groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]); + }else { + groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]); + } + } + n++; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index 1b8b12b..9deef73 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -34,7 +34,7 @@ class UnifracWeightedCommand : public Command { Tree* randT; //random tree TreeMap* tmap; Weighted* weighted; - string weightedFile, sumFile, distFile; + string weightedFile, sumFile; int iters, numGroups, numComp; EstOutput userData; //weighted score info for user tree EstOutput randomData; //weighted score info for random trees @@ -47,11 +47,12 @@ class UnifracWeightedCommand : public Command { map::iterator it; map::iterator it2; - ofstream outSum, outDist, out; + ofstream outSum, out; void printWSummaryFile(); void printWeightedFile(); - void saveRandomScores(); + void saveRandomScores(); + void setGroups(); }; diff --git a/weighted.cpp b/weighted.cpp index bcdee11..b8e8974 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -15,6 +15,7 @@ EstOutput Weighted::getValues(Tree* t) { try { globaldata = GlobalData::getInstance(); int numGroups; + vector D; //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() == 0) { @@ -30,56 +31,66 @@ EstOutput Weighted::getValues(Tree* t) { //initialize weighted scores if (globaldata->Groups.size() == 0) { WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] = 0.0; + D.push_back(0.0000); //initialize a spot in D for each combination }else { WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] = 0.0; + D.push_back(0.0000); //initialize a spot in D for each combination } } } - - data.clear(); //clear out old values - - double D = 0.0000; - - for(int i=0;igetNumLeaves();i++){ - int index = i; - double sum = 0.0000; - //while you aren't at root - while(t->tree[index].getParent() != -1){ - - if(t->tree[index].pGroups.size() != 0){ - sum += t->tree[index].getBranchLength(); - } - - //old_index = you - int old_index = index; - //index = your parent - index = t->tree[index].getParent(); - - //if you grandparent is the root - if(t->tree[index].getParent() == -1 && t->tree[old_index].pGroups.size() != 0){ - int lc = t->tree[t->tree[index].getLChild()].pGroups.size(); - int rc = t->tree[t->tree[index].getRChild()].pGroups.size(); - - - if(lc == 0 || rc == 0){ - sum -= t->tree[old_index].getBranchLength(); - } - } - } - - if(t->tree[i].getGroup() != ""){ - sum /= (double)tmap->seqsPerGroup[t->tree[i].getGroup()]; - D += sum; - } - } - + data.clear(); //clear out old values for(int i=0;igetNumNodes();i++){ //calculate weighted score for each of the group comb i.e. with groups A,B,C = AB, AC, BC. n = 1; for (int b=1; bgetNumLeaves();v++){ + int index = v; + double sum = 0.0000; + + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + //if you have a BL + if(t->tree[index].getBranchLength() != -1){ + sum += t->tree[index].getBranchLength(); + } + + index = t->tree[index].getParent(); + } + + //get last breanch length added + if(t->tree[index].getBranchLength() != -1){ + sum += t->tree[index].getBranchLength(); + } + + if (globaldata->Groups.size() == 0) { + //is this sum from a sequence which is in one of the users groups + if (inUsersGroups(t->tree[v].getGroup(), tmap->namesOfGroups) == true) { + //is this sum from a sequence which is in this groupCombo + if ((t->tree[v].getGroup() == tmap->namesOfGroups[b-1]) || (t->tree[v].getGroup() == tmap->namesOfGroups[l])) { + sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()]; + D[n-1] += sum; + } + } + }else { + //is this sum from a sequence which is in one of the users groups + if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) { + //is this sum from a sequence which is in this groupCombo + if ((t->tree[v].getGroup() == globaldata->Groups[b-1]) || (t->tree[v].getGroup() == globaldata->Groups[l])) { + sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()]; + D[n-1] += sum; + } + } + } + } + /*********************************************************/ + //calculate a u value for each combo double u; //the user has not entered specific groups if (globaldata->Groups.size() == 0) { @@ -123,6 +134,7 @@ EstOutput Weighted::getValues(Tree* t) { //save groupcombs u value WScore[globaldata->Groups[b-1]+globaldata->Groups[l]] += u; } + /*********************************************************/ } n++; } @@ -135,9 +147,9 @@ EstOutput Weighted::getValues(Tree* t) { for (int l = n; l < numGroups; l++) { //the user has not entered specific groups if (globaldata->Groups.size() == 0) { - UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D); + UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D[n-1]); }else {//they have entered specific groups - UN = (WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] / D); + UN = (WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] / D[n-1]); } if (isnan(UN) || isinf(UN)) { UN = 0; } data.push_back(UN); -- 2.39.2