]> git.donarmstrong.com Git - mothur.git/commitdiff
paralellized metastats for windows
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 29 Feb 2012 11:12:22 +0000 (06:12 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 29 Feb 2012 11:12:22 +0000 (06:12 -0500)
matrixoutputcommand.h
metastatscommand.cpp
metastatscommand.h

index 748d8c23e426d69a295a6487454404c5a0598177..762b0e10a02b0428e6a3e8119c1e0a2ee0887d84 100644 (file)
@@ -110,7 +110,6 @@ private:
        
 /**************************************************************************************************/
 //custom data structure for threads to use.
-//main process handling the calcs that can do more than 2 groups
 // This is passed by void pointer so it can be any data type
 // that can be passed using a single void pointer (LPVOID).
 struct distSharedData {
index b7aa7e5fcb3bcc809c89ac3cc82c1ecae9d6272a..e23224bf20e9977e842bc78e765cd95e018a1666 100644 (file)
@@ -10,7 +10,7 @@
 #include "metastatscommand.h"
 #include "metastats.h"
 #include "sharedutilities.h"
-#include "mothurmetastats.h"
+
 
 //**********************************************************************************************************************
 vector<string> MetaStatsCommand::setParameters(){      
@@ -233,7 +233,7 @@ int MetaStatsCommand::execute(){
                if (numGroups == 2) { processors = 1; }
                else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+//             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors != 1){
                                int numPairs = namesOfGroupCombos.size();
                                int numPairsPerProcessor = numPairs / processors;
@@ -246,7 +246,7 @@ int MetaStatsCommand::execute(){
                                        lines.push_back(linePair(startPos, numPairsPerProcessor));
                                }
                        }
-               #endif
+//             #endif
                
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
@@ -339,13 +339,13 @@ int MetaStatsCommand::execute(){
 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
        try {
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
                                if(processors == 1){
                                        driver(0, namesOfGroupCombos.size(), thisLookUp);
                                }else{
                                        int process = 1;
                                        vector<int> processIDS;
-               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                        //loop through and create all the processes you want
                                        while (process != processors) {
                                                int pid = fork();
@@ -371,11 +371,67 @@ int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
                                                int temp = processIDS[i];
                                                wait(&temp);
                                        }
-                               }
-               #else
-                               driver(0, namesOfGroupCombos.size(), thisLookUp);
-               #endif
+        #else
+                    
+                    //////////////////////////////////////////////////////////////////////////////////////////////////////
+                    //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
+                    //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+                    //Taking advantage of shared memory to pass results vectors.
+                    //////////////////////////////////////////////////////////////////////////////////////////////////////
+                    
+                    vector<metastatsData*> pDataArray; 
+                    DWORD   dwThreadIdArray[processors-1];
+                    HANDLE  hThreadArray[processors-1]; 
+                    
+                    //Create processor worker threads.
+                    for( int i=1; i<processors; i++ ){
+                        
+                        //make copy of lookup so we don't get access violations
+                        vector<SharedRAbundVector*> newLookup;
+                        vector<string> designMapGroups;
+                        for (int k = 0; k < thisLookUp.size(); k++) {
+                            SharedRAbundVector* temp = new SharedRAbundVector();
+                            temp->setLabel(thisLookUp[k]->getLabel());
+                            temp->setGroup(thisLookUp[k]->getGroup());
+                            newLookup.push_back(temp);
+                            designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
+                        }
+                        
+                        //for each bin
+                        for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
+                            if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                            for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
+                        }
+                        
+                        // Allocate memory for thread data.
+                        metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
+                        pDataArray.push_back(tempSum);
+                        processIDS.push_back(i);
+                        
+                        hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+                    }
+                    
+                    //do my part
+                                       driver(lines[0].start, lines[0].num, thisLookUp);
+                    
+                    //Wait until all threads have terminated.
+                    WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+                    
+                    //Close all thread handles and free memory allocations.
+                    for(int i=0; i < pDataArray.size(); i++){
+                        for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
+                        for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
+                            outputNames.push_back(pDataArray[i]->outputNames[j]);
+                            outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
+                        }
+                                                
+                        CloseHandle(hThreadArray[i]);
+                        delete pDataArray[i];
+                    }
+        #endif
 
+                               }
+               
                return 0;
                
        }
@@ -394,7 +450,7 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                        //get set names
                        string setA = namesOfGroupCombos[c][0]; 
                        string setB = namesOfGroupCombos[c][1];
-               //cout << setA << '\t' << setB << endl;
+               
                        //get filename
                        string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
                        outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
@@ -424,10 +480,7 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                        setACount++;
                                }
                        }
-                       
-                       //for (int i = 0; i < subset.size(); i++) { cout << designMap->getGroup(subset[i]->getGroup()) << endl; }
-                       //cout << setACount << endl;
-                       
+                                               
                        if ((setACount == 0) || (setBCount == 0))  { 
                                m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
                                outputNames.pop_back();
index 88e32acc0426a15525b1278bd5fc6d9c4d6392fc..2c5c80cd863760f56d7f7653d49a713559c84821 100644 (file)
@@ -13,6 +13,7 @@
 #include "command.hpp"
 #include "inputdata.h"
 #include "sharedrabundvector.h"
+#include "mothurmetastats.h"
 
 class MetaStatsCommand : public Command {
 
@@ -55,5 +56,106 @@ private:
        int driver(int, int, vector<SharedRAbundVector*>&);
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct metastatsData {
+    vector<SharedRAbundVector*> thisLookUp;
+    vector< vector<string> > namesOfGroupCombos;
+    vector<string> designMapGroups;
+    vector<string> outputNames;
+       int start;
+       int num, iters;
+       float threshold;
+       MothurOut* m;
+       string sharedfile;
+    string outputDir;
+       
+       metastatsData(){}
+       metastatsData(string sf, string oDir, MothurOut* mout, int st, int en, vector< vector<string> > ns, vector<SharedRAbundVector*> lu, vector<string> dg, int i, float thr) {
+               sharedfile = sf;
+        outputDir = oDir;
+               m = mout;
+               start = st;
+               num = en;
+        namesOfGroupCombos = ns;
+        thisLookUp = lu;
+        designMapGroups = dg;
+        iters = i;
+        threshold = thr;
+       }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyMetastatsThreadFunction(LPVOID lpParam){ 
+       metastatsData* pDataArray;
+       pDataArray = (metastatsData*)lpParam;
+       
+       try {
+               
+        //for each combo
+               for (int c = pDataArray->start; c < (pDataArray->start+pDataArray->num); c++) {
+                       
+                       //get set names
+                       string setA = pDataArray->namesOfGroupCombos[c][0]; 
+                       string setB = pDataArray->namesOfGroupCombos[c][1];
+            
+                       //get filename
+                       string outputFileName = pDataArray->outputDir +  pDataArray->m->getRootName(pDataArray->m->getSimpleName(pDataArray->sharedfile)) + pDataArray->thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
+                       pDataArray->outputNames.push_back(outputFileName); 
+                       
+                       vector< vector<double> > data2; data2.resize(pDataArray->thisLookUp[0]->getNumBins());
+                       
+                       vector<SharedRAbundVector*> subset;
+                       int setACount = 0;
+                       int setBCount = 0;
+                       for (int i = 0; i < pDataArray->thisLookUp.size(); i++) {
+                               //is this group for a set we want to compare??
+                               //sorting the sets by putting setB at the back and setA in the front
+                               if (pDataArray->designMapGroups[i] == setB) {  
+                                       subset.push_back(pDataArray->thisLookUp[i]);
+                                       setBCount++;
+                               }else if (pDataArray->designMapGroups[i] == setA) {
+                                       subset.insert(subset.begin()+setACount, pDataArray->thisLookUp[i]);
+                                       setACount++;
+                               }
+                       }
+            
+                       if ((setACount == 0) || (setBCount == 0))  { 
+                               pDataArray->m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); pDataArray->m->mothurOutEndLine(); 
+                               pDataArray->outputNames.pop_back();
+                       }else {
+                               //fill data
+                               for (int j = 0; j < pDataArray->thisLookUp[0]->getNumBins(); j++) {
+                                       data2[j].resize(subset.size(), 0.0);
+                                       for (int i = 0; i < subset.size(); i++) {
+                                               data2[j][i] = (subset[i]->getAbundance(j));
+                                       }
+                               }
+                               
+                               pDataArray->m->mothurOut("Comparing " + setA + " and " + setB + "..."); pDataArray->m->mothurOutEndLine(); 
+                               
+                               pDataArray->m->mothurOutEndLine();
+                               MothurMetastats mothurMeta(pDataArray->threshold, pDataArray->iters);
+                               mothurMeta.runMetastats(outputFileName, data2, setACount);
+                               pDataArray->m->mothurOutEndLine();
+                               pDataArray->m->mothurOutEndLine(); 
+                       }
+        }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "MetaStatsCommand", "MyMetastatsThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
+
 #endif