]> git.donarmstrong.com Git - mothur.git/commitdiff
added paralellization for windows to dist.seqs and summary.seqs
authorwestcott <westcott>
Wed, 3 Aug 2011 17:01:11 +0000 (17:01 +0000)
committerwestcott <westcott>
Wed, 3 Aug 2011 17:01:11 +0000 (17:01 +0000)
classify.cpp
distancecommand.cpp
distancecommand.h
metastatscommand.cpp
mothur.h
seqsummarycommand.cpp
seqsummarycommand.h

index 1642f0bb037f609971797812575aa109e35aed28..4c6c6d8c8f0d628653ff772a367b07cdbadecced 100644 (file)
@@ -123,7 +123,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                //create database
                                if(method == "kmer")                    {       database = new KmerDB(tempFile, kmerSize);                      }
                                else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
-                               else if(method == "blast")              {       database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch);   }
+                               else if(method == "blast")              {       database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "");       }
                                else if(method == "distance")   {       database = new DistanceDB();    }
                                else {
                                        m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
index b69b93d85c44c1fa89d08d49c0fb478b2f0cb242..f7dff2e3cb3fb5e81cc8d17af0bcd8d303a5ab6a 100644 (file)
@@ -8,11 +8,6 @@
  */
 
 #include "distancecommand.h"
-#include "ignoregaps.h"
-#include "eachgapdist.h"
-#include "eachgapignore.h"
-#include "onegapdist.h"
-#include "onegapignore.h"
 
 //**********************************************************************************************************************
 vector<string> DistanceCommand::setParameters(){       
@@ -200,26 +195,6 @@ DistanceCommand::DistanceCommand(string option) {
                        if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; }
                        
                        if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; }
-                       
-                       ValidCalculators validCalculator;
-                       
-                       if (m->isTrue(countends) == true) {
-                               for (int i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
-                                               else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
-                                               else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
-                                       }
-                               }
-                       }else {
-                               for (int i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
-                                               else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
-                                               else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
-                                       }
-                               }
-                       }
 
                }
                                
@@ -405,7 +380,7 @@ int DistanceCommand::execute(){
                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else          
                                
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+       //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                //if you don't need to fork anything
                if(processors == 1){
                        if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
@@ -442,14 +417,14 @@ int DistanceCommand::execute(){
                        
                        createProcesses(outputFile); 
                }
-       #else
+       //#else
                //ifstream inFASTA;
-               if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
-               else { driver(0, numSeqs, outputFile, "square");  }
-       #endif
+               //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
+               //else { driver(0, numSeqs, outputFile, "square");  }
+       //#endif
        
 #endif
-               if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
+               if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
                
                #ifdef USE_MPI
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
@@ -492,9 +467,7 @@ int DistanceCommand::execute(){
                        }
                #endif
                
-               if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
-               
-               delete distCalculator;
+               if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
                
                //set phylip file as new current phylipfile
                string current = "";
@@ -567,25 +540,80 @@ void DistanceCommand::createProcesses(string filename) {
                        int temp = processIDS[i];
                        wait(&temp);
                }
+#else
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //that's why the distance calculator was moved inside of the driver to make separate copies.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<distanceData*> pDataArray; //[processors-1];
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor-1 worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       
+                       // Allocate memory for thread data.
+                       distanceData* tempDist = new distanceData(lines[i+1].start, lines[i+1].end, (filename + toString(i) + ".temp"), cutoff, alignDB, Estimators, m, output, numNewFasta, countends);
+                       pDataArray.push_back(tempDist);
+                       processIDS.push_back(i);
+                       
+                       //MyDistThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MyDistThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+               //do your part
+               if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
+               else { driver(lines[0].start, lines[0].end, filename, "square"); }
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+#endif
                
                //append and remove temp files
                for (int i=0;i<processIDS.size();i++) { 
                        m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
                        m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
                }
-#endif
+               
        }
        catch(exception& e) {
                m->errorOut(e, "DistanceCommand", "createProcesses");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 /////// need to fix to work with calcs and sequencedb
 int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){
        try {
-
+               ValidCalculators validCalculator;
+               Dist* distCalculator;
+               if (m->isTrue(countends) == true) {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
+                                       else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
+                                       else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
+                               }
+                       }
+               }else {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
+                                       else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
+                                       else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
+                               }
+                       }
+               }
+               
                int startTime = time(NULL);
                
                //column file
@@ -605,7 +633,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float
                        }
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) { outFile.close(); return 0;  }
+                               if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
                                
                                //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
                                //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
@@ -630,6 +658,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float
                m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                
                outFile.close();
+               delete distCalculator;
                
                return 1;
        }
@@ -642,7 +671,26 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float
 /////// need to fix to work with calcs and sequencedb
 int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){
        try {
-
+               ValidCalculators validCalculator;
+               Dist* distCalculator;
+               if (m->isTrue(countends) == true) {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
+                                       else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
+                                       else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
+                               }
+                       }
+               }else {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
+                                       else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
+                                       else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
+                               }
+                       }
+               }
+               
                int startTime = time(NULL);
                
                //column file
@@ -662,7 +710,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string
                        
                        for(int j=0;j<alignDB.getNumSeqs();j++){
                                
-                               if (m->control_pressed) { outFile.close(); return 0;  }
+                               if (m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
                                
                                distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
                                double dist = distCalculator->getDist();
@@ -680,6 +728,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string
                m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                
                outFile.close();
+               delete distCalculator;
                
                return 1;
        }
@@ -693,6 +742,28 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string
 /////// need to fix to work with calcs and sequencedb
 int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){
        try {
+               
+               ValidCalculators validCalculator;
+               Dist* distCalculator;
+               if (m->isTrue(countends) == true) {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
+                                       else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
+                                       else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
+                               }
+                       }
+               }else {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
+                                       else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
+                                       else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
+                               }
+                       }
+               }
+               
+               
                MPI_Status status;
                int startTime = time(NULL);
                
@@ -702,7 +773,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo
        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) {  delete distCalculator; return 0;  }
                                
                                //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
                                //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
@@ -735,7 +806,8 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo
                }
                
                //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
-               cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;                
+               cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;        
+               delete distCalculator;
                return 1;
        }
        catch(exception& e) {
@@ -747,6 +819,27 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo
 /////// need to fix to work with calcs and sequencedb
 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){
        try {
+               ValidCalculators validCalculator;
+               Dist* distCalculator;
+               if (m->isTrue(countends) == true) {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
+                                       else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
+                                       else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
+                               }
+                       }
+               }else {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
+                                       else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
+                                       else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
+                               }
+                       }
+               }
+               
+               
                MPI_Status status;
                
                MPI_File outMPI;
@@ -778,7 +871,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned
                        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) { delete distCalculator; return 0;  }
                                
                                distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
                                double dist = distCalculator->getDist();
@@ -809,6 +902,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned
                //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
                MPI_File_close(&outMPI);
+               delete distCalculator;
                
                return 1;
        }
@@ -821,6 +915,26 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned
 /////// need to fix to work with calcs and sequencedb
 int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){
        try {
+               ValidCalculators validCalculator;
+               Dist* distCalculator;
+               if (m->isTrue(countends) == true) {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
+                                       else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
+                                       else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
+                               }
+                       }
+               }else {
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
+                                       else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
+                                       else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
+                               }
+                       }
+               }
+               
                MPI_Status status;
                
                MPI_File outMPI;
@@ -852,7 +966,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned
                        
                        for(int j=0;j<alignDB.getNumSeqs();j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) { delete distCalculator; return 0;  }
                                
                                distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
                                double dist = distCalculator->getDist();
@@ -883,7 +997,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned
                //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl;
                MPI_File_close(&outMPI);
-               
+               delete distCalculator;
                return 1;
        }
        catch(exception& e) {
index bac5d14946a591a3a3990637dcf21f492c027ae3..9d5477d7ee1dca3d715189db8d61fab432c87cf3 100644 (file)
 #include "validcalculator.h"
 #include "dist.h"
 #include "sequencedb.h"
+#include "ignoregaps.h"
+#include "eachgapdist.h"
+#include "eachgapignore.h"
+#include "onegapdist.h"
+#include "onegapignore.h"
 
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+typedef struct distanceData {
+       int startLine;
+       int endLine;
+       string dFileName;
+       float cutoff;
+       SequenceDB alignDB;
+       vector<string> Estimators;
+       MothurOut* m;
+       string output;
+       int numNewFasta;
+       string countends;
+       
+       distanceData(){}
+       distanceData(int s, int e, string dbname, float c, SequenceDB db, vector<string> Est, MothurOut* mout, string o, int num, string count) {
+               startLine = s;
+               endLine = e;
+               dFileName = dbname;
+               cutoff = c;
+               alignDB = db;
+               Estimators = Est;
+               m = mout;
+               output = o;
+               numNewFasta = num;
+               countends = count;
+               
+       }
+};
 
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){ 
+       distanceData* pDataArray;
+       pDataArray = (distanceData*)lpParam;
+       
+       try {
+               ValidCalculators validCalculator;
+               Dist* distCalculator;
+               if (pDataArray->m->isTrue(pDataArray->countends) == true) {
+                       for (int i=0; i<pDataArray->Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { 
+                                       if (pDataArray->Estimators[i] == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                                       else if (pDataArray->Estimators[i] == "eachgap")        {       distCalculator = new eachGapDist();     }
+                                       else if (pDataArray->Estimators[i] == "onegap")         {       distCalculator = new oneGapDist();      }
+                               }
+                       }
+               }else {
+                       for (int i=0; i<pDataArray->Estimators.size(); i++) {
+                               if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { 
+                                       if (pDataArray->Estimators[i] == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                                       else if (pDataArray->Estimators[i] == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                                       else if (pDataArray->Estimators[i] == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+                               }
+                       }
+               }
+               
+               int startTime = time(NULL);
+               
+               //column file
+               ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+               
+               
+               if (pDataArray->output != "square") { 
+                       if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){       outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+                       
+                       for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
+                               if(pDataArray->output == "lt")  {       
+                                       string name = pDataArray->alignDB.get(i).getName();
+                                       if (name.length() < 10) { //pad with spaces to make compatible
+                                               while (name.length() < 10) {  name += " ";  }
+                                       }
+                                       outFile << name << '\t';        
+                               }
+                               for(int j=0;j<i;j++){
+                                       
+                                       if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
+                                       
+                                       //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file
+                                       //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop
+                                       if ((i >= pDataArray->numNewFasta) && (j >= pDataArray->numNewFasta)) { break; }
+                                       
+                                       distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
+                                       double dist = distCalculator->getDist();
+                                       
+                                       if(dist <= pDataArray->cutoff){
+                                               if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
+                                       }
+                                       if (pDataArray->output == "lt") {  outFile << dist << '\t'; }
+                               }
+                               
+                               if (pDataArray->output == "lt") { outFile << endl; }
+                               
+                               if(i % 100 == 0){
+                                       pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                               }
+                               
+                       }
+                       pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               }else{
+                       if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+                       
+                       for(int i=pDataArray->startLine;i<pDataArray->endLine;i++){
+                               
+                               string name = pDataArray->alignDB.get(i).getName();
+                               //pad with spaces to make compatible
+                               if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
+                               
+                               outFile << name << '\t';        
+                               
+                               for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
+                                       
+                                       if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0;  }
+                                       
+                                       distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j));
+                                       double dist = distCalculator->getDist();
+                                       
+                                       outFile << dist << '\t'; 
+                               }
+                               
+                               outFile << endl; 
+                               
+                               if(i % 100 == 0){
+                                       pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                               }
+                               
+                       }
+                       pDataArray->m->mothurOut(toString(pDataArray->endLine-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               }
+               
+               outFile.close();
+               delete distCalculator;
+               
+               return 0; 
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "DistanceCommand", "MyDistThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+/**************************************************************************************************/
 class DistanceCommand : public Command {
 
 public:
@@ -42,7 +193,7 @@ private:
                
        };
        
-       Dist* distCalculator;
+       //Dist* distCalculator;
        SequenceDB alignDB;
 
        string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress;
@@ -74,5 +225,7 @@ private:
 
 #endif
 
+/**************************************************************************************************/
+
 
 
index 7942bfcce548030951e3c5157438d13eb88c17d6..1aaa41e91a3d15eb9d07bb73723ae7514c9b1216 100644 (file)
@@ -393,7 +393,7 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                        //get set names
                        string setA = namesOfGroupCombos[c][0]; 
                        string setB = namesOfGroupCombos[c][1];
-                       
+               
                        //get filename
                        string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
                        outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
@@ -423,7 +423,9 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                        setACount++;
                                }
                        }
-                                               
+                       
+                       //for (int i = 0; i < subset.size(); i++) { cout << subset[i]->getGroup() << endl; }
+                       
                        if ((setACount == 0) || (setBCount == 0))  { 
                                m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
                                outputNames.pop_back();
index 53074eb1e146f28af23a87e6888d57bfaa5184fa..f8ec32363516bcc6543569b57f313505796acef7 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -72,6 +72,8 @@
        #include <windows.h>
        #include <psapi.h>
        #include <direct.h>
+       #include <tchar.h>
+
 #endif
 
 using namespace std;
index 2fbf6d1ca616033f4fd52993412f0e720f8187af..ee8835386e1937bd8bc6d51ab2b0779f05f6a679 100644 (file)
@@ -8,7 +8,7 @@
  */
 
 #include "seqsummarycommand.h"
-#include "sequence.hpp"
+
 
 //**********************************************************************************************************************
 vector<string> SeqSummaryCommand::setParameters(){     
@@ -281,31 +281,30 @@ int SeqSummaryCommand::execute(){
                                
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else
-                       vector<unsigned long int> positions = m->divideFile(fastafile, processors);
-                               
-                       for (int i = 0; i < (positions.size()-1); i++) {
-                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
-                       }       
-
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               if(processors == 1){
-                                       numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
-                               }else{
-                                       numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
-                                       
-                                       rename((summaryFile + toString(processIDS[0]) + ".temp").c_str(), summaryFile.c_str());
-                                       //append files
-                                       for(int i=1;i<processors;i++){
-                                               m->appendFiles((summaryFile + toString(processIDS[i]) + ".temp"), summaryFile);
-                                               m->mothurRemove((summaryFile + toString(processIDS[i]) + ".temp"));
-                                       }
+                       vector<unsigned long int> positions; 
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               positions = m->divideFile(fastafile, processors);
+                               for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
+                       #else
+                               positions = m->setFilePosFasta(fastafile, numSeqs); 
+               
+                               //figure out how many sequences you have to process
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               for (int i = 0; i < processors; i++) {
+                                       int startIndex =  i * numSeqsPerProcessor;
+                                       if(i == (processors - 1)){      numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
+                                       lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
                                }
-                               
-                               if (m->control_pressed) {  return 0; }
-               #else
+                       #endif
+                       
+
+                       if(processors == 1){
                                numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
-                               if (m->control_pressed) {  return 0; }
-               #endif
+                       }else{
+                               numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
+                       }
+                       
+                       if (m->control_pressed) {  return 0; }
 #endif
                        
                #ifdef USE_MPI
@@ -507,11 +506,12 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
 /**************************************************************************************************/
 int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
+               int process = 1;
                int num = 0;
                processIDS.clear();
                
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -545,8 +545,11 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                        }
                }
                
+               //do your part
+               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile, lines[0]);
+
                //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
+               for (int i=0;i<processIDS.size();i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
                }
@@ -568,10 +571,54 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                                
                        in.close();
                        m->mothurRemove(tempFilename);
+                       
+                       m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
+                       m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
                }
                
-               return num;
+#else
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add info to vectors.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<seqSumData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors];
+               HANDLE  hThreadArray[processors]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors; i++ ){
+                       
+                       //cout << i << '\t' << lines[i]->start << '\t' << lines[i]->end << endl;
+                       // Allocate memory for thread data.
+                       seqSumData* tempSum = new seqSumData(&startPosition, &endPosition, &seqLength, &ambigBases, &longHomoPolymer, filename, (sumFile + toString(i) + ".temp"), m, lines[i]->start, lines[i]->end, namefile, nameMap);
+                       pDataArray.push_back(tempSum);
+                       processIDS.push_back(i);
+                               
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+                       
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+               
+               //rename((sumFile + toString(processIDS[0]) + ".temp").c_str(), sumFile.c_str());
+               //append files
+               for(int i=0;i<processIDS.size();i++){
+                       m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
+                       m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
+               }
 #endif         
+               return num;
        }
        catch(exception& e) {
                m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
index c0d368770cfa8df11560d2b89fee200231cb0dc3..a7ccb573490135c10c6fdb7258bb46228047d98b 100644 (file)
@@ -12,6 +12,9 @@
 
 #include "mothur.h"
 #include "command.hpp"
+#include "sequence.hpp"
+
+/**************************************************************************************************/
 
 class SeqSummaryCommand : public Command {
 public:
@@ -54,4 +57,118 @@ private:
 
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+typedef struct seqSumData {
+       vector<int>* startPosition;
+       vector<int>* endPosition;
+       vector<int>* seqLength; 
+       vector<int>* ambigBases; 
+       vector<int>* longHomoPolymer; 
+       string filename; 
+       string sumFile; 
+       unsigned long int start;
+       unsigned long int end;
+       int count;
+       MothurOut* m;
+       string namefile;
+       map<string, int> nameMap;
+       
+       
+       seqSumData(){}
+       seqSumData(vector<int>* s, vector<int>* e, vector<int>* l, vector<int>* a, vector<int>* h, string f, string sf, MothurOut* mout, unsigned long int st, unsigned long int en, string na, map<string, int> nam) {
+               startPosition = s;
+               endPosition = e;
+               seqLength = l;
+               ambigBases = a;
+               longHomoPolymer = h;
+               filename = f;
+               sumFile = sf;
+               m = mout;
+               start = st;
+               end = en;
+               namefile = na;
+               nameMap = nam;
+               count = 0;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){ 
+       seqSumData* pDataArray;
+       pDataArray = (seqSumData*)lpParam;
+       
+       try {
+               ofstream outSummary;
+               pDataArray->m->openOutputFile(pDataArray->sumFile, outSummary);
+               
+               ifstream in;
+               pDataArray->m->openInputFile(pDataArray->filename, in);
+
+               //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl;  
+                       in.seekg(0);
+               }else { //this accounts for the difference in line endings. 
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
+               }
+               
+               pDataArray->count = pDataArray->end;
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+                       
+                       if (pDataArray->m->control_pressed) { in.close(); outSummary.close(); pDataArray->count = 1; return 1; }
+                       
+                       Sequence current(in); pDataArray->m->gobble(in); 
+                       
+                       if (current.getName() != "") {
+                               
+                               int num = 1;
+                               if (pDataArray->namefile != "") {
+                                       //make sure this sequence is in the namefile, else error 
+                                       map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
+                                       
+                                       if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+                                       else { num = it->second; }
+                               }
+                               
+                               //for each sequence this sequence represents
+                               for (int i = 0; i < num; i++) {
+                                       pDataArray->startPosition->push_back(current.getStartPos());
+                                       pDataArray->endPosition->push_back(current.getEndPos());
+                                       pDataArray->seqLength->push_back(current.getNumBases());
+                                       pDataArray->ambigBases->push_back(current.getAmbigBases());
+                                       pDataArray->longHomoPolymer->push_back(current.getLongHomoPolymer());
+                               }
+                               
+                               outSummary << current.getName() << '\t';
+                               outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+                               outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+                               outSummary << current.getLongHomoPolymer() << '\t' << num << endl;
+                       }
+               }
+               
+               in.close();
+               outSummary.close();
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "SeqSummaryCommand", "MySeqSumThreadFunction");
+               exit(1);
+       }
+} 
 #endif
+
+
+
+
+#endif
+
+/**************************************************************************************************/
+
+