]> git.donarmstrong.com Git - mothur.git/blobdiff - pairwiseseqscommand.h
changes while testing
[mothur.git] / pairwiseseqscommand.h
index afb8e19dc476050139de07964c7697626fef3fd7..90c21b25a7c35c47834ea7311f3e7f969163fc98 100644 (file)
 #include "validcalculator.h"
 #include "dist.h"
 #include "sequencedb.h"
+#include "sequence.hpp"
+
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+#include "ignoregaps.h"
+#include "eachgapdist.h"
+#include "eachgapignore.h"
+#include "onegapdist.h"
+#include "onegapignore.h"
 
 class PairwiseSeqsCommand : public Command {
        
@@ -28,7 +40,9 @@ public:
        vector<string> setParameters();
        string getCommandName()                 { return "pairwise.seqs";               }
        string getCommandCategory()             { return "Sequence Processing"; }
+       
        string getHelpString(); 
+    string getOutputPattern(string);   
        string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; }
        string getDescription()         { return "calculates pairwise distances from an unaligned fasta file"; }
 
@@ -44,8 +58,6 @@ private:
        vector<int> processIDS;   //end line, processid
        vector<distlinePair> lines;
        
-       Alignment* alignment;
-       Dist* distCalculator;
        SequenceDB alignDB;
        
        void createProcesses(string);
@@ -54,18 +66,266 @@ private:
        
        #ifdef USE_MPI 
        int driverMPI(int, int, MPI_File&, float);
-       int driverMPI(int, int, string, unsigned long int&);
-       int driverMPI(int, int, string, unsigned long int&, string);
+       int driverMPI(int, int, string, unsigned long long&);
+       int driverMPI(int, int, string, unsigned long long&, string);
        #endif
        
        string fastaFileName, align, calc, outputDir, output;
        float match, misMatch, gapOpen, gapExtend, cutoff;
-       int processors;
+       int processors, longestBase;
        vector<string> fastaFileNames, Estimators;
        vector<string> outputNames;
        
        bool abort, countends, compress;
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct pairwiseData {
+    string outputFileName;
+       string align, square, distcalcType, output;
+       unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+       float match, misMatch, gapOpen, gapExtend, cutoff;
+       int count, threadID, longestBase;
+    bool countends;
+    SequenceDB alignDB;
+       
+       pairwiseData(){}
+       pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, float cu, int tid) {
+               outputFileName = ofn;
+               m = mout;
+               start = st;
+               end = en;
+               match = ma; 
+               misMatch = misMa;
+               gapOpen = gapO; 
+               gapExtend = gapE; 
+               longestBase = thr;
+               align = al;
+        square = sq;
+        distcalcType = di;
+        countends = co;
+        alignDB = DB;
+               count = 0;
+        output = op;
+        cutoff = cu;
+               threadID = tid;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){ 
+       pairwiseData* pDataArray;
+       pDataArray = (pairwiseData*)lpParam;
+       
+       try {
+               ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+               
+               pDataArray->count = 0;
+        
+        int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
+               }
+               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (pDataArray->countends) {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                else if (pDataArray->distcalcType == "eachgap")        {       distCalculator = new eachGapDist();     }
+                else if (pDataArray->distcalcType == "onegap")         {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                else if (pDataArray->distcalcType == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (pDataArray->distcalcType == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+
+        if(pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+               
+               for(int i=pDataArray->start;i<pDataArray->end;i++){
+            pDataArray->count++;
+            
+                       string name = pDataArray->alignDB.get(i).getName();
+                       //pad with spaces to make compatible
+                       if (name.length() < 10) { while (name.length() < 10) {  name += " ";  } }
+            
+                       outFile << name << '\t';        
+                       
+                       for(int j=0;j<pDataArray->alignDB.getNumSeqs();j++){
+                               
+                               if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
+                               
+                               if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
+                               }
+                               
+                               if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
+                               }
+                               
+                               Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
+                               Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
+                               
+                               alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
+                               seqI.setAligned(alignment->getSeqAAln());
+                               seqJ.setAligned(alignment->getSeqBAln());
+                               
+                               distCalculator->calcDist(seqI, seqJ);
+                               double dist = distCalculator->getDist();
+                
+                if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' +  alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); }
+                
+                               outFile << dist << '\t'; 
+                       }
+                       
+                       outFile << endl; 
+                       
+                       if(i % 100 == 0){
+                               pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
+                       }
+                       
+               }
+               pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n");
+               
+               outFile.close();
+        delete alignment;
+        delete distCalculator;
+
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction");
+               exit(1);
+       }
+} 
+
+/**************************************************************************************************/
+static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){ 
+       pairwiseData* pDataArray;
+       pDataArray = (pairwiseData*)lpParam;
+       
+       try {
+               ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc);
+               outFile.setf(ios::fixed, ios::showpoint);
+               outFile << setprecision(4);
+        
+        int startTime = time(NULL);
+        
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                     }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);                                }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase);
+               }
+               
+        ValidCalculators validCalculator;
+        Dist* distCalculator;
+        if (pDataArray->countends) {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")                      {       distCalculator = new ignoreGaps();      }
+                else if (pDataArray->distcalcType == "eachgap")        {       distCalculator = new eachGapDist();     }
+                else if (pDataArray->distcalcType == "onegap")         {       distCalculator = new oneGapDist();      }
+            }
+        }else {
+            if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { 
+                if (pDataArray->distcalcType == "nogaps")              {       distCalculator = new ignoreGaps();                                      }
+                else if (pDataArray->distcalcType == "eachgap"){       distCalculator = new eachGapIgnoreTermGapDist();        }
+                else if (pDataArray->distcalcType == "onegap") {       distCalculator = new oneGapIgnoreTermGapDist();         }
+            }
+        }
+        
+        if((pDataArray->output == "lt") && pDataArray->start == 0){    outFile << pDataArray->alignDB.getNumSeqs() << endl;    }
+               
+        pDataArray->count = 0;
+               for(int i=pDataArray->start;i<pDataArray->end;i++){
+            pDataArray->count++;
+            
+                       if(pDataArray->output == "lt")  {       
+                               string name = pDataArray->alignDB.get(i).getName();
+                               if (name.length() < 10) { //pad with spaces to make compatible
+                                       while (name.length() < 10) {  name += " ";  }
+                               }
+                               outFile << name << '\t';        
+                       }
+
+                       
+                       for(int j=0;j<i;j++){
+                               
+                               if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0;  }
+                               
+                               if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1);
+                               }
+                               
+                               if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1);
+                               }
+                               
+                               Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned());
+                               Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned());
+                               
+                               alignment->align(seqI.getUnaligned(), seqJ.getUnaligned());
+                               seqI.setAligned(alignment->getSeqAAln());
+                               seqJ.setAligned(alignment->getSeqBAln());
+                               
+                               distCalculator->calcDist(seqI, seqJ);
+                               double dist = distCalculator->getDist();
+                
+                if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' +  alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); }
+                
+                               if(dist <= pDataArray->cutoff){
+                                       if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; }
+                               }
+                               if (pDataArray->output == "lt") {  outFile << dist << '\t'; }
+                       }
+                       
+                       if (pDataArray->output == "lt") { outFile << endl; }
+                       
+                       if(i % 100 == 0){
+                               pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); 
+                       }
+                       
+               }
+               pDataArray->m->mothurOutJustToScreen(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)+"\n");
+               
+               outFile.close();
+        delete alignment;
+        delete distCalculator;
+        
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction");
+               exit(1);
+       }
+} 
+
+#endif
+
+
 #endif