From: Sarah Westcott Date: Wed, 29 Feb 2012 11:12:22 +0000 (-0500) Subject: paralellized metastats for windows X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=6b2ac07f3e9ee57594d2358a3a25f9f700bd7362 paralellized metastats for windows --- diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h index 748d8c2..762b0e1 100644 --- a/matrixoutputcommand.h +++ b/matrixoutputcommand.h @@ -110,7 +110,6 @@ private: /**************************************************************************************************/ //custom data structure for threads to use. -//main process handling the calcs that can do more than 2 groups // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct distSharedData { diff --git a/metastatscommand.cpp b/metastatscommand.cpp index b7aa7e5..e23224b 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -10,7 +10,7 @@ #include "metastatscommand.h" #include "metastats.h" #include "sharedutilities.h" -#include "mothurmetastats.h" + //********************************************************************************************************************** vector MetaStatsCommand::setParameters(){ @@ -233,7 +233,7 @@ int MetaStatsCommand::execute(){ if (numGroups == 2) { processors = 1; } else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +// #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors != 1){ int numPairs = namesOfGroupCombos.size(); int numPairsPerProcessor = numPairs / processors; @@ -246,7 +246,7 @@ int MetaStatsCommand::execute(){ lines.push_back(linePair(startPos, numPairsPerProcessor)); } } - #endif +// #endif //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { @@ -339,13 +339,13 @@ int MetaStatsCommand::execute(){ int MetaStatsCommand::process(vector& thisLookUp){ try { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ driver(0, namesOfGroupCombos.size(), thisLookUp); }else{ int process = 1; vector processIDS; - + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -371,11 +371,67 @@ int MetaStatsCommand::process(vector& thisLookUp){ int temp = processIDS[i]; wait(&temp); } - } - #else - driver(0, namesOfGroupCombos.size(), thisLookUp); - #endif + #else + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to pass results vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i newLookup; + vector designMapGroups; + for (int k = 0; k < thisLookUp.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisLookUp[k]->getLabel()); + temp->setGroup(thisLookUp[k]->getGroup()); + newLookup.push_back(temp); + designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup())); + } + + //for each bin + for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); } + } + + // Allocate memory for thread data. + metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold); + pDataArray.push_back(tempSum); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //do my part + driver(lines[0].start, lines[0].num, thisLookUp); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; } + for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { + outputNames.push_back(pDataArray[i]->outputNames[j]); + outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]); + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + #endif + } + return 0; } @@ -394,7 +450,7 @@ int MetaStatsCommand::driver(int start, int num, vector& th //get set names string setA = namesOfGroupCombos[c][0]; string setB = namesOfGroupCombos[c][1]; - //cout << setA << '\t' << setB << endl; + //get filename string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); @@ -424,10 +480,7 @@ int MetaStatsCommand::driver(int start, int num, vector& th setACount++; } } - - //for (int i = 0; i < subset.size(); i++) { cout << designMap->getGroup(subset[i]->getGroup()) << endl; } - //cout << setACount << endl; - + if ((setACount == 0) || (setBCount == 0)) { m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); outputNames.pop_back(); diff --git a/metastatscommand.h b/metastatscommand.h index 88e32ac..2c5c80c 100644 --- a/metastatscommand.h +++ b/metastatscommand.h @@ -13,6 +13,7 @@ #include "command.hpp" #include "inputdata.h" #include "sharedrabundvector.h" +#include "mothurmetastats.h" class MetaStatsCommand : public Command { @@ -55,5 +56,106 @@ private: int driver(int, int, vector&); }; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct metastatsData { + vector thisLookUp; + vector< vector > namesOfGroupCombos; + vector designMapGroups; + vector outputNames; + int start; + int num, iters; + float threshold; + MothurOut* m; + string sharedfile; + string outputDir; + + metastatsData(){} + metastatsData(string sf, string oDir, MothurOut* mout, int st, int en, vector< vector > ns, vector lu, vector dg, int i, float thr) { + sharedfile = sf; + outputDir = oDir; + m = mout; + start = st; + num = en; + namesOfGroupCombos = ns; + thisLookUp = lu; + designMapGroups = dg; + iters = i; + threshold = thr; + } +}; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MyMetastatsThreadFunction(LPVOID lpParam){ + metastatsData* pDataArray; + pDataArray = (metastatsData*)lpParam; + + try { + + //for each combo + for (int c = pDataArray->start; c < (pDataArray->start+pDataArray->num); c++) { + + //get set names + string setA = pDataArray->namesOfGroupCombos[c][0]; + string setB = pDataArray->namesOfGroupCombos[c][1]; + + //get filename + string outputFileName = pDataArray->outputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(pDataArray->sharedfile)) + pDataArray->thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats"; + pDataArray->outputNames.push_back(outputFileName); + + vector< vector > data2; data2.resize(pDataArray->thisLookUp[0]->getNumBins()); + + vector subset; + int setACount = 0; + int setBCount = 0; + for (int i = 0; i < pDataArray->thisLookUp.size(); i++) { + //is this group for a set we want to compare?? + //sorting the sets by putting setB at the back and setA in the front + if (pDataArray->designMapGroups[i] == setB) { + subset.push_back(pDataArray->thisLookUp[i]); + setBCount++; + }else if (pDataArray->designMapGroups[i] == setA) { + subset.insert(subset.begin()+setACount, pDataArray->thisLookUp[i]); + setACount++; + } + } + + if ((setACount == 0) || (setBCount == 0)) { + pDataArray->m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); pDataArray->m->mothurOutEndLine(); + pDataArray->outputNames.pop_back(); + }else { + //fill data + for (int j = 0; j < pDataArray->thisLookUp[0]->getNumBins(); j++) { + data2[j].resize(subset.size(), 0.0); + for (int i = 0; i < subset.size(); i++) { + data2[j][i] = (subset[i]->getAbundance(j)); + } + } + + pDataArray->m->mothurOut("Comparing " + setA + " and " + setB + "..."); pDataArray->m->mothurOutEndLine(); + + pDataArray->m->mothurOutEndLine(); + MothurMetastats mothurMeta(pDataArray->threshold, pDataArray->iters); + mothurMeta.runMetastats(outputFileName, data2, setACount); + pDataArray->m->mothurOutEndLine(); + pDataArray->m->mothurOutEndLine(); + } + } + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "MetaStatsCommand", "MyMetastatsThreadFunction"); + exit(1); + } +} +#endif + + + #endif