]> git.donarmstrong.com Git - mothur.git/commitdiff
paralellized pairwise.seqs for windows
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 21 Feb 2012 19:11:15 +0000 (14:11 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 21 Feb 2012 19:11:15 +0000 (14:11 -0500)
blastalign.hpp
classifytreecommand.cpp
classifytreecommand.h
pairwiseseqscommand.cpp
pairwiseseqscommand.h

index 5b78da6b635770b53453cf9b6ec84493b17bf71a..31688bd86d77386d384fef73bb9c3289e681ada3 100644 (file)
@@ -1,3 +1,7 @@
+#ifndef BlastAlignment_H
+#define BlastAlignment_H
+
+
 /*
  *  blastalign.hpp
  *  
@@ -36,3 +40,7 @@ private:
        float gapExtend;
 };
 
+#endif
+
+
+
index e1ef6d304227f7b523ebb9540373a58cdd2339a0..9ec4e6f89a40d12a1661144f8835cd6e1a4ccc84 100644 (file)
@@ -33,7 +33,7 @@ vector<string> ClassifyTreeCommand::setParameters(){
 string ClassifyTreeCommand::getHelpString(){   
        try {
                string helpString = "";
-               helpString += "The classify.tree command reads a tree and taxonomy file and output the concensus taxonomy for each node on the tree. \n";
+               helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
                helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
                helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
                helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
index bd0e1ce4f084ccefae3fd8c088322dca1dc214b5..026e4bae414d227dab7750d623999b86d4aa4a73 100644 (file)
@@ -24,7 +24,7 @@ public:
        string getCommandCategory()             { return "Phylotype Analysis";          }
        string getHelpString(); 
        string getCitation() { return "http://www.mothur.org/wiki/Classify.tree"; }
-       string getDescription()         { return "Find the concensus taxonomy for the descendant of each tree node"; }
+       string getDescription()         { return "Find the consensus taxonomy for the descendant of each tree node"; }
     
        int execute();
        void help() { m->mothurOut(getHelpString()); }  
index 114f545aa26eb326f078b6ab7e1b18e024ecdcb5..ed11dfbdd56e0ff480bfd281b944dcaa1fab34eb 100644 (file)
@@ -8,19 +8,6 @@
  */
 
 #include "pairwiseseqscommand.h"
-#include "sequence.hpp"
-
-#include "gotohoverlap.hpp"
-#include "needlemanoverlap.hpp"
-#include "blastalign.hpp"
-#include "noalign.hpp"
-
-#include "ignoregaps.h"
-#include "eachgapdist.h"
-#include "eachgapignore.h"
-#include "onegapdist.h"
-#include "onegapignore.h"
-
 
 //**********************************************************************************************************************
 vector<string> PairwiseSeqsCommand::setParameters(){   
@@ -247,25 +234,6 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option)  {
                                 if (calc == "default")  {  calc = "onegap";  }
                        }
                        m->splitAtDash(calc, Estimators);
-                       
-                       ValidCalculators validCalculator;
-                       if (countends) {
-                               for (int i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "nogaps")                  {       distCalculator = new ignoreGaps();      }
-                                               else if (Estimators[i] == "eachgap")    {       distCalculator = new eachGapDist();     }
-                                               else if (Estimators[i] == "onegap")             {       distCalculator = new oneGapDist();      }
-                                       }
-                               }
-                       }else {
-                               for (int i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "nogaps")          {       distCalculator = new ignoreGaps();                                      }
-                                               else if (Estimators[i] == "eachgap"){   distCalculator = new eachGapIgnoreTermGapDist();        }
-                                               else if (Estimators[i] == "onegap")     {       distCalculator = new oneGapIgnoreTermGapDist();         }
-                                       }
-                               }
-                       }
                }
                
        }
@@ -280,17 +248,7 @@ int PairwiseSeqsCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               int longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
-               
-               if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
-               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
-               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
-               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
-               else {
-                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
-                       m->mothurOutEndLine();
-                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
-               }
+               longestBase = 2000; //will need to update this in driver if we find sequences with more bases.  hardcoded so we don't have the pre-read user fasta file.
                
                cutoff += 0.005;
                
@@ -357,11 +315,11 @@ int PairwiseSeqsCommand::execute(){
                                        
                                        driverMPI(start, end, outMPI, cutoff); 
                                        
-                                       if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);  m->mothurRemove(outputFile); return 0; }
                                
                                        //wait on chidren
                                        for(int i = 1; i < processors; i++) { 
-                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   m->mothurRemove(outputFile);  return 0; }
                                                
                                                char buf[5];
                                                MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
@@ -370,7 +328,7 @@ int PairwiseSeqsCommand::execute(){
                                        //do your part
                                        driverMPI(start, end, outMPI, cutoff); 
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);  return 0; }
                                
                                        char buf[5];
                                        strcpy(buf, "done"); 
@@ -390,7 +348,7 @@ int PairwiseSeqsCommand::execute(){
                                        if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
                                        else { driverMPI(start, end, outputFile, mySize, output); }
                
-                                       if (m->control_pressed) {  outputTypes.clear();   m->mothurRemove(outputFile); delete distCalculator;  return 0; }
+                                       if (m->control_pressed) {  outputTypes.clear();   m->mothurRemove(outputFile);   return 0; }
                                        
                                        int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
                                        MPI_File outMPI;
@@ -405,7 +363,7 @@ int PairwiseSeqsCommand::execute(){
                                        for(int b = 1; b < processors; b++) { 
                                                unsigned long long fileSize;
                                                
-                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);  delete distCalculator;  return 0; }
+                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);  m->mothurRemove(outputFile);   return 0; }
                                                
                                                MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
                                                
@@ -444,7 +402,7 @@ int PairwiseSeqsCommand::execute(){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        #else           
                                        
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        //if you don't need to fork anything
                        if(processors == 1){
                                if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
@@ -465,14 +423,14 @@ int PairwiseSeqsCommand::execute(){
                                
                                createProcesses(outputFile); 
                        }
-               #else
+               //#else
                        //ifstream inFASTA;
-                       if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
-                       else { driver(0, numSeqs, outputFile, "square");  }
-               #endif
+                       //if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
+                       //else { driver(0, numSeqs, outputFile, "square");  }
+               //#endif
                
        #endif
-                       if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
+                       if (m->control_pressed) { outputTypes.clear();   m->mothurRemove(outputFile); return 0; }
                        
                        #ifdef USE_MPI
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
@@ -500,10 +458,8 @@ int PairwiseSeqsCommand::execute(){
                        
                        m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
                        
-                       if (m->control_pressed) { outputTypes.clear();  delete distCalculator; m->mothurRemove(outputFile); return 0; }
+                       if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; }
                }
-                       
-               delete distCalculator;
                
                //set phylip file as new current phylipfile
                string current = "";
@@ -535,9 +491,11 @@ int PairwiseSeqsCommand::execute(){
 /**************************************************************************************************/
 void PairwiseSeqsCommand::createProcesses(string filename) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
+        int process = 1;
                processIDS.clear();
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -567,13 +525,51 @@ void PairwiseSeqsCommand::createProcesses(string filename) {
                        int temp = processIDS[i];
                        wait(&temp);
                }
+#else     
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the distanceData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //that's why the distance calculator was moved inside of the driver to make separate copies.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<pairwiseData*> pDataArray; //[processors-1];
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor-1 worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       string extension = toString(i) + ".temp";
+
+                       // Allocate memory for thread data.
+                       pairwiseData* tempDist = new pairwiseData((filename+extension), align, "square", Estimators[0], countends, output, alignDB, m, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, i);
+                       pDataArray.push_back(tempDist);
+                       processIDS.push_back(i);
+                       
+                       if (output != "square") { hThreadArray[i] = CreateThread(NULL, 0, MyPairwiseThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);  } 
+            else { hThreadArray[i] = CreateThread(NULL, 0, MyPairwiseSquareThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);  }
+               }
+               
+               //do your part
+               if (output != "square") {  driver(lines[0].start, lines[0].end, filename, cutoff); }
+               else { driver(lines[0].start, lines[0].end, filename, "square"); }
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
-               //append and remove temp files
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
+#endif
+        
+        //append and remove temp files
                for (int i=0;i<processIDS.size();i++) { 
                        m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filename);
                        m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
                }
-#endif
+        
        }
        catch(exception& e) {
                m->errorOut(e, "PairwiseSeqsCommand", "createProcesses");
@@ -587,6 +583,33 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl
        try {
 
                int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
+        
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
                
                //column file
                ofstream outFile(dFileName.c_str(), ios::trunc);
@@ -606,7 +629,7 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl
                        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) { outFile.close(); return 0;  }
+                               if (m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -643,6 +666,8 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl
                m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                
                outFile.close();
+        delete alignment;
+        delete distCalculator;
                
                return 1;
        }
@@ -657,7 +682,34 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st
        try {
 
                int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
                
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
                //column file
                ofstream outFile(dFileName.c_str(), ios::trunc);
                outFile.setf(ios::fixed, ios::showpoint);
@@ -675,7 +727,7 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st
                        
                        for(int j=0;j<alignDB.getNumSeqs();j++){
                                
-                               if (m->control_pressed) { outFile.close(); return 0;  }
+                               if (m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -708,6 +760,8 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st
                m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine();
                
                outFile.close();
+        delete alignment;
+        delete distCalculator;
                
                return 1;
        }
@@ -723,14 +777,41 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI,
        try {
                MPI_Status status;
                int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
                
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
                string outputString = "";
                
                for(int i=startLine;i<endLine;i++){
        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) { delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -772,7 +853,8 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI,
                        delete buf;
                        
                }
-               
+               delete alignment;
+        delete distCalculator;
                return 1;
        }
        catch(exception& e) {
@@ -794,7 +876,33 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
 
                MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
 
-               
+               Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
+        
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (countends) {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")                 {       distCalculator = new ignoreGaps();      }
+                else if (Estimators[0] == "eachgap")   {       distCalculator = new eachGapDist();     }
+                else if (Estimators[0] == "onegap")            {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { 
+                if (Estimators[0] == "nogaps")         {       distCalculator = new ignoreGaps();                                      }
+                else if (Estimators[0] == "eachgap"){  distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (Estimators[0] == "onegap")    {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
                
                string outputString = "";
                size = 0;
@@ -811,7 +919,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                        
                        for(int j=0;j<i;j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) { delete alignment; delete distCalculator; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -848,6 +956,8 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                }
                
                MPI_File_close(&outMPI);
+        delete alignment;
+        delete distCalculator;
                
                return 1;
        }
@@ -870,7 +980,16 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
 
                MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
                
-               
+               Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+               else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
+               else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+                       m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+               }
                
                string outputString = "";
                size = 0;
@@ -887,7 +1006,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                        
                        for(int j=0;j<alignDB.getNumSeqs();j++){
                                
-                               if (m->control_pressed) {  return 0;  }
+                               if (m->control_pressed) {  delete alignment; return 0;  }
                                
                                if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
                                        alignment->resize(alignDB.get(i).getUnaligned().length()+1);
@@ -925,6 +1044,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi
                
                MPI_File_close(&outMPI);
                
+        delete alignment;
                return 1;
        }
        catch(exception& e) {
index a3a91f7bcd1f4ad2bbd52024d36153399ebc1414..ba82f47dfc7025481d3a6b7763a525aff690cece 100644 (file)
 #include "validcalculator.h"
 #include "dist.h"
 #include "sequencedb.h"
+#include "sequence.hpp"
+
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+#include "ignoregaps.h"
+#include "eachgapdist.h"
+#include "eachgapignore.h"
+#include "onegapdist.h"
+#include "onegapignore.h"
 
 class PairwiseSeqsCommand : public Command {
        
@@ -44,8 +56,6 @@ private:
        vector<int> processIDS;   //end line, processid
        vector<distlinePair> lines;
        
-       Alignment* alignment;
-       Dist* distCalculator;
        SequenceDB alignDB;
        
        void createProcesses(string);
@@ -60,12 +70,254 @@ private:
        
        string fastaFileName, align, calc, outputDir, output;
        float match, misMatch, gapOpen, gapExtend, cutoff;
-       int processors;
+       int processors, longestBase;
        vector<string> fastaFileNames, Estimators;
        vector<string> outputNames;
        
        bool abort, countends, compress;
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct pairwiseData {
+    string outputFileName;
+       string align, square, distcalcType, output;
+       unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+       float match, misMatch, gapOpen, gapExtend, cutoff;
+       int count, threadID, longestBase;
+    bool countends;
+    SequenceDB alignDB;
+       
+       pairwiseData(){}
+       pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, int tid) {
+               outputFileName = ofn;
+               m = mout;
+               start = st;
+               end = en;
+               match = ma; 
+               misMatch = misMa;
+               gapOpen = gapO; 
+               gapExtend = gapE; 
+               longestBase = thr;
+               align = al;
+        square = sq;
+        distcalcType = di;
+        countends = co;
+        alignDB = DB;
+               count = 0;
+        output = op;
+               threadID = tid;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){ 
+       pairwiseData* pDataArray;
+       pDataArray = (pairwiseData*)lpParam;
+       
+       try {
+               ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+               
+               pDataArray->count = pDataArray->end;
+        
+        int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
+               }
+               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (pDataArray->countends) {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                else if (pDataArray->distcalcType == "eachgap")        {       distCalculator = new eachGapDist();     }
+                else if (pDataArray->distcalcType == "onegap")         {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                else if (pDataArray->distcalcType == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (pDataArray->distcalcType == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
+        if(pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+               
+               for(int i=pDataArray->start;i<pDataArray->end;i++){
+            
+                       string name = pDataArray->alignDB.get(i).getName();
+                       //pad with spaces to make compatible
+                       if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
+            
+                       outFile << name << '\t';        
+                       
+                       for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
+                               
+                               if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
+                               
+                               if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
+                               }
+                               
+                               if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
+                               }
+                               
+                               Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
+                               Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
+                               
+                               alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
+                               seqI.setAligned(alignment->getSeqAAln());
+                               seqJ.setAligned(alignment->getSeqBAln());
+                               
+                               distCalculator->calcDist(seqI, seqJ);
+                               double dist = distCalculator->getDist();
+                
+                               outFile << dist << '\t'; 
+                       }
+                       
+                       outFile << endl; 
+                       
+                       if(i % 100 == 0){
+                               pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                       }
+                       
+               }
+               pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               
+               outFile.close();
+        delete alignment;
+        delete distCalculator;
+
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
+               exit(1);
+       }
+} 
+
+/**************************************************************************************************/
+static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){ 
+       pairwiseData* pDataArray;
+       pDataArray = (pairwiseData*)lpParam;
+       
+       try {
+               ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+               
+        pDataArray->count = pDataArray->end;
+        
+        int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
+               }
+               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (pDataArray->countends) {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                else if (pDataArray->distcalcType == "eachgap")        {       distCalculator = new eachGapDist();     }
+                else if (pDataArray->distcalcType == "onegap")         {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                else if (pDataArray->distcalcType == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (pDataArray->distcalcType == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+        
+        if((pDataArray->output == "lt") && pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+               
+               for(int i=pDataArray->start;i<pDataArray->end;i++){
+            
+                       if(pDataArray->output == "lt")  {       
+                               string name = pDataArray->alignDB.get(i).getName();
+                               if (name.length() < 10) { //pad with spaces to make compatible
+                                       while (name.length() < 10) {  name += " ";  }
+                               }
+                               outFile << name << '\t';        
+                       }
+
+                       
+                       for(int j=0;j<i;j++){
+                               
+                               if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
+                               
+                               if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
+                               }
+                               
+                               if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
+                               }
+                               
+                               Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
+                               Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
+                               
+                               alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
+                               seqI.setAligned(alignment->getSeqAAln());
+                               seqJ.setAligned(alignment->getSeqBAln());
+                               
+                               distCalculator->calcDist(seqI, seqJ);
+                               double dist = distCalculator->getDist();
+                
+                               if(dist <= pDataArray->cutoff){
+                                       if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
+                               }
+                               if (pDataArray->output == "lt") {  outFile << dist << '\t'; }
+                       }
+                       
+                       if (pDataArray->output == "lt") { outFile << endl; }
+                       
+                       if(i % 100 == 0){
+                               pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+                       }
+                       
+               }
+               pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine();
+               
+               outFile.close();
+        delete alignment;
+        delete distCalculator;
+        
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");
+               exit(1);
+       }
+} 
+
+#endif
+
+
 #endif