From bd93b1a6f9fe9a6a4a7ac2e9f106e5c83a438856 Mon Sep 17 00:00:00 2001 From: westcott Date: Tue, 31 Aug 2010 17:55:38 +0000 Subject: [PATCH] working on parallelizing unifrac.unweighted. --- summarysharedcommand.cpp | 2 +- unifracunweightedcommand.cpp | 14 +-- unifracunweightedcommand.h | 2 +- unifracweightedcommand.cpp | 42 ++----- unweighted.cpp | 219 ++++++++++++++++++++++++----------- unweighted.h | 14 ++- weighted.cpp | 37 +++--- 7 files changed, 201 insertions(+), 129 deletions(-) diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 1046c1b..3828a38 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -234,7 +234,7 @@ int SummarySharedCommand::execute(){ outputFileHandle << "label" <<'\t' << "comparison" << '\t'; for(int i=0;igetName(); - if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\tlci\thci"; } + if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } } outputFileHandle << endl; diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 68c943c..7836eb6 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -21,7 +21,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { else { //valid paramters for this command - string Array[] = {"groups","iters","distance","random", "outputdir","inputdir"}; + string Array[] = {"groups","iters","distance","random", "processors","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -61,6 +61,9 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; } random = m->isTrue(temp); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + convert(temp, processors); + if (!random) { iters = 0; } //turn off random calcs //if user selects distance = true and no groups it won't calc the pairwise @@ -156,14 +159,9 @@ int UnifracUnweightedCommand::execute() { utreeScores.resize(numComp); UWScoreSig.resize(numComp); - userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore + userData = unweighted->getValues(T[i], processors, outputDir); //userData[0] = unweightedscore - if (m->control_pressed) { - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }return 0; } //output scores for each combination for(int k = 0; k < numComp; k++) { diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h index f6f8678..3e6d835 100644 --- a/unifracunweightedcommand.h +++ b/unifracunweightedcommand.h @@ -36,7 +36,7 @@ class UnifracUnweightedCommand : public Command { Unweighted* unweighted; string sumFile, allGroups; vector groupComb; // AB. AC, BC... - int iters, numGroups, numComp, counter; + int iters, numGroups, numComp, counter, processors; EstOutput userData; //unweighted score info for user tree EstOutput randomData; //unweighted score info for random trees vector< vector > utreeScores; //scores for users trees for each comb. diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 65b0a50..ad012b5 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -275,7 +275,7 @@ int UnifracWeightedCommand::execute() { int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector > namesOfGroupCombos, vector& sums, vector< vector >& scores) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; int num = 0; vector processIDS; @@ -295,52 +295,32 @@ int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".results.temp"; + string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp"; m->openOutputFile(tempFile, out); - out << results.size() << endl; - for (int i = lines[process]->start; i < (lines[process]->start + lines[process]->num); i++) { out << results[i] << '\t'; } out << endl; + for (int i = lines[process]->start; i < (lines[process]->start + lines[process]->num); i++) { out << scores[i][0] << '\t'; } out << endl; out.close(); exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } + driver(t, randT, namesOfGroupCombos, lines[0]->start, lines[0]->num, sums, scores); + //force parent to wait until all the processes are done - for (int i=0;iopenInputFile(s, in); - vector r; - - //get quantiles - while (!in.eof()) { - int num; - in >> num; - - m->gobble(in); - - double w; - for (int j = 0; j < num; j++) { - in >> w; - r.push_back(w); - } - m->gobble(in); - } + for (int i = lines[process]->start; i < (lines[process]->start + lines[process]->num); i++) { in >> scores[i][0]; } in.close(); remove(s.c_str()); - - //save quan in quantiles - for (int j = 0; j < r.size(); j++) { - //put all values of r into results - results.push_back(r[j]); - } } m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -358,7 +338,7 @@ int UnifracWeightedCommand::createProcesses(Tree* t, Tree* randT, vector< vector int UnifracWeightedCommand::driver(Tree* t, Tree* randT, vector< vector > namesOfGroupCombos, int start, int num, vector& sums, vector< vector >& scores) { try { int count = 0; - int total = start+num; + int total = num; int twentyPercent = (total * 0.20); for (int h = start; h < (start+num); h++) { @@ -389,7 +369,7 @@ int UnifracWeightedCommand::driver(Tree* t, Tree* randT, vector< vector count++; //report progress - if((h) % twentyPercent == 0){ m->mothurOut("Random comparison percentage complete: " + toString(int((h / (float)total) * 100.0))); m->mothurOutEndLine(); } + if((count) % twentyPercent == 0){ m->mothurOut("Random comparison percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine(); } } m->mothurOut("Random comparison percentage complete: 100"); m->mothurOutEndLine(); diff --git a/unweighted.cpp b/unweighted.cpp index c4548c9..ba88049 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -11,73 +11,28 @@ /**************************************************************************************************/ -EstOutput Unweighted::getValues(Tree* t) { +EstOutput Unweighted::getValues(Tree* t, int p, string o) { try { globaldata = GlobalData::getInstance(); - - vector groups; - double UniqueBL; //a branch length is unique if it's chidren are from the same group - double totalBL; //all branch lengths - double UW; //Unweighted Value = UniqueBL / totalBL; - + processors = p; + outputDir = o; + //if the users enters no groups then give them the score of all groups int numGroups = globaldata->Groups.size(); //calculate number of comparsions int numComp = 0; + vector< vector > namesOfGroupCombos; for (int r=0; r groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]); + namesOfGroupCombos.push_back(groups); } } - - //numComp+1 for AB, AC, BC, ABC - data.resize(numComp+1,0); - - int count = 0; - for (int a=0; aGroups[a]); groups.push_back(globaldata->Groups[l]); - - for(int i=0;igetNumNodes();i++){ - if (m->control_pressed) { return data; } - - //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want - //pcountSize = 2, not unique to one group - //pcountSize = 1, unique to one group - - int pcountSize = 0; - for (int j = 0; j < groups.size(); j++) { - map::iterator itGroup = t->tree[i].pcount.find(groups[j]); - if (itGroup != t->tree[i].pcount.end()) { pcountSize++; } - } - - if (pcountSize == 0) { } - else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) { UniqueBL += abs(t->tree[i].getBranchLength()); } - - if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) { - totalBL += abs(t->tree[i].getBranchLength()); - } - } - - UW = (UniqueBL / totalBL); - - if (isnan(UW) || isinf(UW)) { UW = 0; } - - data[count] = UW; - count++; - groups.clear(); - } - } - if (numComp != 1) { + vector groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { @@ -85,15 +40,145 @@ EstOutput Unweighted::getValues(Tree* t) { groups.push_back(tmap->namesOfGroups[i]); } } + namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < globaldata->Groups.size(); i++) { groups.push_back(globaldata->Groups[i]); } + namesOfGroupCombos.push_back(groups); + } + } + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + }else{ + int numPairs = namesOfGroupCombos.size(); + + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ + numPairsPerProcessor = numPairs - i * numPairsPerProcessor; + } + lines.push_back(new linePair(startPos, numPairsPerProcessor)); + } + + data = createProcesses(t, namesOfGroupCombos); + + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); } + #else + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + #endif - UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group - totalBL = 0.00; //all branch lengths - UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + return data; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "getValues"); + exit(1); + } +} +/**************************************************************************************************/ + +EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 1; + int num = 0; + vector processIDS; + + EstOutput results; + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + EstOutput myresults; + myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num); + + if (m->control_pressed) { exit(0); } + + m->mothurOut("Merging results."); m->mothurOutEndLine(); + + //pass numSeqs to parent + ofstream out; + string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp"; + m->openOutputFile(tempFile, out); + out << myresults.size() << endl; + for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; + out.close(); + + exit(0); + }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + } + + results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num); + + //force parent to wait until all the processes are done + for (int i=0;i<(processors-1);i++) { + int temp = processIDS[i]; + wait(&temp); + } + + if (m->control_pressed) { return results; } + + //get data created by processes + for (int i=0;i<(processors-1);i++) { + ifstream in; + string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp"; + m->openInputFile(s, in); + + //get quantiles + if (!in.eof()) { + int num; + in >> num; m->gobble(in); + + if (m->control_pressed) { break; } + + double w; + for (int j = 0; j < num; j++) { + in >> w; + results.push_back(w); + } + m->gobble(in); + } + in.close(); + remove(s.c_str()); + } + + m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); + + return results; +#endif + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "createProcesses"); + exit(1); + } +} +/**************************************************************************************************/ +EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { + try { + + EstOutput results; results.resize(num); + + int count = 0; + int total = num; + int twentyPercent = (total * 0.20); + + for (int h = start; h < (start+num); h++) { + + if (m->control_pressed) { return results; } + + double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group + double totalBL = 0.00; //all branch lengths + double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; for(int i=0;igetNumNodes();i++){ @@ -104,8 +189,8 @@ EstOutput Unweighted::getValues(Tree* t) { //pcountSize = 1, unique to one group int pcountSize = 0; - for (int j = 0; j < groups.size(); j++) { - map::iterator itGroup = t->tree[i].pcount.find(groups[j]); + for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { + map::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]); if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } @@ -121,18 +206,22 @@ EstOutput Unweighted::getValues(Tree* t) { if (isnan(UW) || isinf(UW)) { UW = 0; } - data[count] = UW; - } + results[count] = UW; + count++; - return data; - + //report progress + if((count) % twentyPercent == 0){ m->mothurOut("Percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine(); } + } + + m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine(); + + return results; } catch(exception& e) { - m->errorOut(e, "Unweighted", "getValues"); + m->errorOut(e, "Unweighted", "driver"); exit(1); } } - /**************************************************************************************************/ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { @@ -277,6 +366,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { } } - +/**************************************************************************************************/ diff --git a/unweighted.h b/unweighted.h index 007fef7..f27df8c 100644 --- a/unweighted.h +++ b/unweighted.h @@ -21,14 +21,26 @@ class Unweighted : public TreeCalculator { public: Unweighted(TreeMap* t) : tmap(t) {}; ~Unweighted() {}; - EstOutput getValues(Tree*); + EstOutput getValues(Tree*, int, string); EstOutput getValues(Tree*, string, string); private: + struct linePair { + int start; + int num; + linePair(int i, int j) : start(i), num(j) {} + }; + vector lines; + GlobalData* globaldata; Tree* copyTree; EstOutput data; TreeMap* tmap; + int processors; + string outputDir; + + EstOutput driver(Tree*, vector< vector >, int, int); + EstOutput createProcesses(Tree*, vector< vector >); }; diff --git a/weighted.cpp b/weighted.cpp index d068369..c9ad812 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -74,7 +74,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, vector& sums) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; int num = 0; vector processIDS; @@ -88,7 +88,8 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - results = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums); + EstOutput Myresults; + Myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums); if (m->control_pressed) { exit(0); } @@ -96,18 +97,20 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".results.temp"; + string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp"; m->openOutputFile(tempFile, out); - out << results.size() << endl; - for (int i = 0; i < results.size(); i++) { out << results[i] << '\t'; } out << endl; + out << Myresults.size() << endl; + for (int i = 0; i < Myresults.size(); i++) { out << Myresults[i] << '\t'; } out << endl; out.close(); exit(0); }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } } + + results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num, sums); //force parent to wait until all the processes are done - for (int i=0;i > namesOfGro if (m->control_pressed) { return results; } //get data created by processes - for (int i=0;iopenInputFile(s, in); - vector r; - //get quantiles while (!in.eof()) { int num; - in >> num; + in >> num; m->gobble(in); if (m->control_pressed) { break; } - - m->gobble(in); double w; for (int j = 0; j < num; j++) { in >> w; - r.push_back(w); + results.push_back(w); } m->gobble(in); } in.close(); remove(s.c_str()); - - //save quan in quantiles - for (int j = 0; j < r.size(); j++) { - //put all values of r into results - results.push_back(r[j]); - } } m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -167,7 +160,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, vector D; int count = 0; - int total = start+num; + int total = num; int twentyPercent = (total * 0.20); for (int h = start; h < (start+num); h++) { @@ -203,7 +196,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, count++; //report progress - if((h) % twentyPercent == 0){ m->mothurOut("Percentage complete: " + toString(int((h / (float)total) * 100.0))); m->mothurOutEndLine(); } + if((count) % twentyPercent == 0){ m->mothurOut("Percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine(); } } m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine(); -- 2.39.2