From 257808d42702d889a85d0132abc6b8776fc57418 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 30 Aug 2010 15:14:18 +0000 Subject: [PATCH] sped up unifrac unweighted. --- sharedutilities.cpp | 1 + unifracunweightedcommand.cpp | 30 ++++---- unifracweightedcommand.cpp | 4 +- unweighted.cpp | 129 ++++++++++++++++------------------- 4 files changed, 76 insertions(+), 88 deletions(-) diff --git a/sharedutilities.cpp b/sharedutilities.cpp index 4339a25..6dffee9 100644 --- a/sharedutilities.cpp +++ b/sharedutilities.cpp @@ -226,6 +226,7 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups } //rip extra - off allgroups label = label.substr(0, label.length()-1); + if ((mode != "weighted") && (allGroups.size() > 10)) { label = "merged"; } } if (mode == "weighted") { diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 5fd26df..68c943c 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -58,7 +58,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { string temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; } phylip = m->isTrue(temp); - temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "true"; } + temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; } random = m->isTrue(temp); if (!random) { iters = 0; } //turn off random calcs @@ -105,7 +105,7 @@ void UnifracUnweightedCommand::help(){ m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"); m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"); m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n"); - m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is true, meaning compare your trees with randomly generated trees.\n"); + m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n"); m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n"); m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n"); m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n"); @@ -199,21 +199,23 @@ int UnifracUnweightedCommand::execute() { validScores[randomData[k]] = randomData[k]; } } - + for(int a = 0; a < numComp; a++) { float rcumul = 1.0000; - //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (map::iterator it = validScores.begin(); it != validScores.end(); it++) { - //make rscoreFreq map and rCumul - map::iterator it2 = rscoreFreq[a].find(it->first); - rCumul[a][it->first] = rcumul; - //get percentage of random trees with that info - if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; } - else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score - } - if (random) { UWScoreSig[a].push_back(rCumul[a][userData[a]]); } - else { UWScoreSig[a].push_back(0.0); } + if (random) { + //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. + for (map::iterator it = validScores.begin(); it != validScores.end(); it++) { + //make rscoreFreq map and rCumul + map::iterator it2 = rscoreFreq[a].find(it->first); + rCumul[a][it->first] = rcumul; + //get percentage of random trees with that info + if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; } + else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score + } + UWScoreSig[a].push_back(rCumul[a][userData[a]]); + }else { UWScoreSig[a].push_back(0.0); } + } diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 9cda8f2..0201776 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -58,7 +58,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { string temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; } phylip = m->isTrue(temp); - temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "true"; } + temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; } random = m->isTrue(temp); if (!random) { iters = 0; } //turn off random calcs @@ -97,7 +97,7 @@ void UnifracWeightedCommand::help(){ m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"); m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"); m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n"); - m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is true, meaning compare your trees with randomly generated trees.\n"); + m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n"); m->mothurOut("The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n"); m->mothurOut("Example unifrac.weighted(groups=A-B-C, iters=500).\n"); m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n"); diff --git a/unweighted.cpp b/unweighted.cpp index 1f6ad96..d9b3b58 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -19,9 +19,6 @@ EstOutput Unweighted::getValues(Tree* t) { double UniqueBL; //a branch length is unique if it's chidren are from the same group double totalBL; //all branch lengths double UW; //Unweighted Value = UniqueBL / totalBL; - map::iterator it; //iterator to traverse pgroups - map copyIpcount; - //if the users enters no groups then give them the score of all groups int numGroups = globaldata->Groups.size(); @@ -43,7 +40,7 @@ EstOutput Unweighted::getValues(Tree* t) { UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group totalBL = 0.00; //all branch lengths UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - copyIpcount.clear(); + //copyIpcount.clear(); //groups in this combo groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]); @@ -51,27 +48,25 @@ EstOutput Unweighted::getValues(Tree* t) { for(int i=0;igetNumNodes();i++){ if (m->control_pressed) { return data; } - copyIpcount = t->tree[i].pcount; - for (it = copyIpcount.begin(); it != copyIpcount.end();) { - if (m->inUsersGroups(it->first, groups) != true) { - copyIpcount.erase(it++); - }else { it++; } + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < groups.size(); j++) { + map::iterator itGroup = t->tree[i].pcount.find(groups[j]); + if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } + + if (pcountSize == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } - //if i's children are from the same group then i's pcount size will be 1 - //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want - if (copyIpcount.size() == 0) { } - else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } - - //add i's BL to total if it is from the groups the user wants - if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { totalBL += abs(t->tree[i].getBranchLength()); } - } UW = (UniqueBL / totalBL); - //cout << globaldata->Groups[a] << globaldata->Groups[l] << '\t' << UniqueBL << '\t' << totalBL << endl; if (isnan(UW) || isinf(UW)) { UW = 0; } @@ -99,29 +94,27 @@ EstOutput Unweighted::getValues(Tree* t) { UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group totalBL = 0.00; //all branch lengths UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - copyIpcount.clear(); for(int i=0;igetNumNodes();i++){ if (m->control_pressed) { return data; } - copyIpcount = t->tree[i].pcount; - for (it = copyIpcount.begin(); it != copyIpcount.end();) { - if (m->inUsersGroups(it->first, groups) != true) { - copyIpcount.erase(it++); - }else { it++; } + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < groups.size(); j++) { + map::iterator itGroup = t->tree[i].pcount.find(groups[j]); + if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } - - //if i's children are from the same group then i's pcount size will be 1 - //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want - if (copyIpcount.size() == 0) { } - else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } - - //add i's BL to total if it is from the groups the user wants - if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + + if (pcountSize == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } + + if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { totalBL += abs(t->tree[i].getBranchLength()); } - } UW = (UniqueBL / totalBL); @@ -150,8 +143,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { double UniqueBL; //a branch length is unique if it's chidren are from the same group double totalBL; //all branch lengths double UW; //Unweighted Value = UniqueBL / totalBL; - map::iterator it; //iterator to traverse pgroups - map copyIpcount; copyTree = new Tree; //if the users enters no groups then give them the score of all groups @@ -174,7 +165,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group totalBL = 0.00; //all branch lengths UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - copyIpcount.clear(); //copy random tree passed in copyTree->getCopy(t); @@ -187,33 +177,29 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { if (m->control_pressed) { delete copyTree; return data; } - //copyTree->createNewickFile("random"+groupA+toString(count)); - for(int i=0;igetNumNodes();i++){ + + if (m->control_pressed) { return data; } - if (m->control_pressed) { delete copyTree; return data; } + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group - /**********************************************************************/ - //This section adds in all lengths that are non leaf - copyIpcount = copyTree->tree[i].pcount; - for (it = copyIpcount.begin(); it != copyIpcount.end();) { - if (m->inUsersGroups(it->first, groups) != true) { - copyIpcount.erase(it++); - }else { it++; } + int pcountSize = 0; + for (int j = 0; j < groups.size(); j++) { + map::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]); + if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } - - //if i's children are from the same group then i's pcount size will be 1 - //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want - if (copyIpcount.size() == 0) { } - else if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += abs(copyTree->tree[i].getBranchLength()); } - - //add i's BL to total if it is from the groups the user wants - if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + + if (pcountSize == 0) { } + else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(copyTree->tree[i].getBranchLength()); } + + if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { totalBL += abs(copyTree->tree[i].getBranchLength()); } - } - + + UW = (UniqueBL / totalBL); if (isnan(UW) || isinf(UW)) { UW = 0; } @@ -242,7 +228,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group totalBL = 0.00; //all branch lengths UW = 0.00; //Unweighted Value = UniqueBL / totalBL; - copyIpcount.clear(); //copy random tree passed in copyTree->getCopy(t); @@ -253,25 +238,25 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { if (m->control_pressed) { delete copyTree; return data; } for(int i=0;igetNumNodes();i++){ - if (m->control_pressed) { delete copyTree; return data; } - copyIpcount = copyTree->tree[i].pcount; - for (it = copyIpcount.begin(); it != copyIpcount.end();) { - if (m->inUsersGroups(it->first, groups) != true) { - copyIpcount.erase(it++); - }else { it++; } + if (m->control_pressed) { return data; } + + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < groups.size(); j++) { + map::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]); + if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } - - //if i's children are from the same group then i's pcount size will be 1 - //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want - if (copyIpcount.size() == 0) { } - else if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { abs(UniqueBL += copyTree->tree[i].getBranchLength()); } - - //add i's BL to total if it is from the groups the user wants - if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { + + if (pcountSize == 0) { } + else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(copyTree->tree[i].getBranchLength()); } + + if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { totalBL += abs(copyTree->tree[i].getBranchLength()); } - } UW = (UniqueBL / totalBL); -- 2.39.2