From: westcott Date: Fri, 25 Feb 2011 11:13:46 +0000 (+0000) Subject: added root parameter to the unifrac commands so you can choose to include the entire... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=fca3f55d5ded10c3dc77856f3cc4a1c53b02bb6f added root parameter to the unifrac commands so you can choose to include the entire root or stop the calc at the "root" of the comparison. --- diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 63487e0..2e90626 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -12,7 +12,7 @@ //********************************************************************************************************************** vector UnifracUnweightedCommand::getValidParameters(){ try { - string Array[] = {"groups","iters","distance","random", "processors","outputdir","inputdir"}; + string Array[] = {"groups","iters","distance","random","root", "processors","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -73,7 +73,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { else { //valid paramters for this command - string Array[] = {"groups","iters","distance","random", "processors","outputdir","inputdir"}; + string Array[] = {"groups","iters","distance","random","root", "processors","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -124,6 +124,9 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; } random = m->isTrue(temp); + temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; } + includeRoot = m->isTrue(temp); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } convert(temp, processors); @@ -149,7 +152,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } - unweighted = new Unweighted(tmap); + unweighted = new Unweighted(tmap, includeRoot); } @@ -167,11 +170,12 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { void UnifracUnweightedCommand::help(){ try { m->mothurOut("The unifrac.unweighted command can only be executed after a successful read.tree command.\n"); - m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors and random. No parameters are required.\n"); + m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors, root and random. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"); m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"); m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n"); m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n"); + m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n"); m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n"); m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n"); m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n"); diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h index a1f2bf3..cd46b62 100644 --- a/unifracunweightedcommand.h +++ b/unifracunweightedcommand.h @@ -50,7 +50,7 @@ class UnifracUnweightedCommand : public Command { vector< map > rscoreFreq; //map -vector entry for each combination. vector< map > rCumul; //map -vector entry for each combination. - bool abort, phylip, random; + bool abort, phylip, random, includeRoot; string groups, itersString, outputDir, outputForm; vector Groups, outputNames; //holds groups to be used map > outputTypes; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 1fa1a28..0ccdf2f 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -12,7 +12,7 @@ //********************************************************************************************************************** vector UnifracWeightedCommand::getValidParameters(){ try { - string Array[] = {"groups","iters","distance","random","processors","outputdir","inputdir"}; + string Array[] = {"groups","iters","distance","random","processors","root","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -72,7 +72,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { else { //valid paramters for this command - string Array[] = {"groups","iters","distance","random","processors","outputdir","inputdir"}; + string Array[] = {"groups","iters","distance","random","processors","root","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -120,9 +120,12 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; } } - temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; } + temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; } random = m->isTrue(temp); + temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; } + includeRoot = m->isTrue(temp); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } convert(temp, processors); @@ -141,7 +144,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { util->setGroups(globaldata->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze util->getCombos(groupComb, globaldata->Groups, numComp); - weighted = new Weighted(tmap); + weighted = new Weighted(tmap, includeRoot); } } @@ -158,11 +161,12 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { void UnifracWeightedCommand::help(){ try { m->mothurOut("The unifrac.weighted command can only be executed after a successful read.tree command.\n"); - m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance, processors and random. No parameters are required.\n"); + m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance, processors, root and random. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"); m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"); m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n"); m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n"); + m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n"); m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n"); m->mothurOut("The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n"); m->mothurOut("Example unifrac.weighted(groups=A-B-C, iters=500).\n"); diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index 5bb690a..2eee7c4 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -60,7 +60,7 @@ class UnifracWeightedCommand : public Command { vector< map > rCumul; //map -vector entry for each c map validScores; //map contains scores from random - bool abort, phylip, random; + bool abort, phylip, random, includeRoot; string groups, itersString, outputForm; vector Groups, outputNames; //holds groups to be used map > outputTypes; diff --git a/unweighted.cpp b/unweighted.cpp index f0c7c40..08e83ec 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -199,7 +199,8 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]); m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW; }else{ - + + //if including the root this clears rootForGrouping[namesOfGroupCombos[h]] getRoot(t, nodeBelonging, namesOfGroupCombos[h]); for(int i=0;igetNumNodes();i++){ @@ -215,19 +216,19 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo map::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]); if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } - + + //unique calc if (pcountSize == 0) { } else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root UniqueBL += abs(t->tree[i].getBranchLength()); } - + //total calc if (pcountSize == 0) { } else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root totalBL += abs(t->tree[i].getBranchLength()); } - } //cout << UniqueBL << '\t' << totalBL << endl; UW = (UniqueBL / totalBL); @@ -442,6 +443,7 @@ EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombo m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW; }else{ + //if including the root this clears rootForGrouping[namesOfGroupCombos[h]] getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]); for(int i=0;igetNumNodes();i++){ @@ -497,47 +499,51 @@ int Unweighted::getRoot(Tree* t, int v, vector grouping) { //you are a leaf so get your parent int index = t->tree[index].getParent(); - //my parent is a potential root - rootForGrouping[grouping].insert(index); - - //while you aren't at root - while(t->tree[index].getParent() != -1){ - - if (m->control_pressed) { return 0; } - - //am I the root for this grouping? if so I want to stop "early" - //does my sibling have descendants from the users groups? - //if so I am not the root - int parent = t->tree[index].getParent(); - int lc = t->tree[parent].getLChild(); - int rc = t->tree[parent].getRChild(); - - int sib = lc; - if (lc == index) { sib = rc; } + if (includeRoot) { + rootForGrouping[grouping].clear(); + }else { + //my parent is a potential root + rootForGrouping[grouping].insert(index); - map::iterator itGroup; - int pcountSize = 0; - for (int j = 0; j < grouping.size(); j++) { - map::iterator itGroup = t->tree[sib].pcount.find(grouping[j]); - if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return 0; } + + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + //if so I am not the root + int parent = t->tree[index].getParent(); + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + for (int j = 0; j < grouping.size(); j++) { + map::iterator itGroup = t->tree[sib].pcount.find(grouping[j]); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + } + + //if yes, I am not the root + if (pcountSize != 0) { + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + } + + index = parent; } - //if yes, I am not the root - if (pcountSize != 0) { - rootForGrouping[grouping].clear(); + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + while(t->tree[index].getParent() != -1){ + int parent = t->tree[index].getParent(); rootForGrouping[grouping].insert(parent); + //cout << parent << " in root" << endl; + index = parent; } - - index = parent; - } - - //get all nodes above the root to add so we don't add their u values above - index = *(rootForGrouping[grouping].begin()); - while(t->tree[index].getParent() != -1){ - int parent = t->tree[index].getParent(); - rootForGrouping[grouping].insert(parent); - //cout << parent << " in root" << endl; - index = parent; } return 0; diff --git a/unweighted.h b/unweighted.h index 598af1a..0ccfab5 100644 --- a/unweighted.h +++ b/unweighted.h @@ -19,7 +19,7 @@ class Unweighted : public TreeCalculator { public: - Unweighted(TreeMap* t) : tmap(t) {}; + Unweighted(TreeMap* t, bool r) : tmap(t), includeRoot(r) {}; ~Unweighted() {}; EstOutput getValues(Tree*, int, string); EstOutput getValues(Tree*, string, string, int, string); @@ -38,6 +38,7 @@ class Unweighted : public TreeCalculator { int processors; string outputDir; map< vector, set > rootForGrouping; //maps a grouping combo to the roots for that combo + bool includeRoot; EstOutput driver(Tree*, vector< vector >, int, int); EstOutput createProcesses(Tree*, vector< vector >); diff --git a/weighted.cpp b/weighted.cpp index b158815..cae2b92 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -228,12 +228,19 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; } - //if this is not the root then add it - if (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0) { + if (includeRoot) { if (t->tree[i].getBranchLength() != -1) { u = abs(u * t->tree[i].getBranchLength()); WScore[(groupA+groupB)] += u; } + }else { + //if this is not the root then add it + if (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0) { + if (t->tree[i].getBranchLength() != -1) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; + } + } } } @@ -317,12 +324,19 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; } - //if this is not the root then add it - if (rootForGrouping[groups].count(i) == 0) { + if (includeRoot) { if (t->tree[i].getBranchLength() != -1) { u = abs(u * t->tree[i].getBranchLength()); WScore[(groupA+groupB)] += u; } + }else{ + //if this is not the root then add it + if (rootForGrouping[groups].count(i) == 0) { + if (t->tree[i].getBranchLength() != -1) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; + } + } } } /********************************************************/ @@ -361,37 +375,43 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) { while(t->tree[index].getParent() != -1){ if (m->control_pressed) { return sum; } - - //am I the root for this grouping? if so I want to stop "early" - //does my sibling have descendants from the users groups? + int parent = t->tree[index].getParent(); - int lc = t->tree[parent].getLChild(); - int rc = t->tree[parent].getRChild(); - int sib = lc; - if (lc == index) { sib = rc; } - - map::iterator itGroup; - int pcountSize = 0; - itGroup = t->tree[sib].pcount.find(groupA); - if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } - itGroup = t->tree[sib].pcount.find(groupB); - if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } - - //if yes, I am not the root so add me - if (pcountSize != 0) { - if (t->tree[index].getBranchLength() != -1) { - sum += abs(t->tree[index].getBranchLength()) + tempTotal; - tempTotal = 0.0; - }else { - sum += tempTotal; - tempTotal = 0.0; - } - rootForGrouping[grouping].clear(); - rootForGrouping[grouping].insert(parent); - }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent - if (t->tree[index].getBranchLength() != -1) { - tempTotal += abs(t->tree[index].getBranchLength()); + if (includeRoot) { //add everyone + if(t->tree[index].getBranchLength() != -1){ sum += abs(t->tree[index].getBranchLength()); } + }else { + + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + itGroup = t->tree[sib].pcount.find(groupA); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + itGroup = t->tree[sib].pcount.find(groupB); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + + //if yes, I am not the root so add me + if (pcountSize != 0) { + if (t->tree[index].getBranchLength() != -1) { + sum += abs(t->tree[index].getBranchLength()) + tempTotal; + tempTotal = 0.0; + }else { + sum += tempTotal; + tempTotal = 0.0; + } + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent + if (t->tree[index].getBranchLength() != -1) { + tempTotal += abs(t->tree[index].getBranchLength()); + } } } diff --git a/weighted.h b/weighted.h index 11cd26b..d78edf4 100644 --- a/weighted.h +++ b/weighted.h @@ -19,7 +19,7 @@ class Weighted : public TreeCalculator { public: - Weighted(TreeMap* t) : tmap(t) {}; + Weighted(TreeMap* t, bool r) : tmap(t), includeRoot(r) {}; ~Weighted() {}; EstOutput getValues(Tree*, string, string); @@ -41,6 +41,7 @@ class Weighted : public TreeCalculator { int processors; string outputDir; map< vector, set > rootForGrouping; //maps a grouping combo to the root for that combo + bool includeRoot; EstOutput driver(Tree*, vector< vector >, int, int); EstOutput createProcesses(Tree*, vector< vector >);