From a78fa674631a7d8a8d4e5043384ee244ed65cc09 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 21 Feb 2012 14:11:15 -0500 Subject: [PATCH] paralellized pairwise.seqs for windows --- blastalign.hpp | 8 ++ classifytreecommand.cpp | 2 +- classifytreecommand.h | 2 +- pairwiseseqscommand.cpp | 258 +++++++++++++++++++++++++++++----------- pairwiseseqscommand.h | 258 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 454 insertions(+), 74 deletions(-) diff --git a/blastalign.hpp b/blastalign.hpp index 5b78da6..31688bd 100644 --- a/blastalign.hpp +++ b/blastalign.hpp @@ -1,3 +1,7 @@ +#ifndef BlastAlignment_H +#define BlastAlignment_H + + /* * blastalign.hpp * @@ -36,3 +40,7 @@ private: float gapExtend; }; +#endif + + + diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp index e1ef6d3..9ec4e6f 100644 --- a/classifytreecommand.cpp +++ b/classifytreecommand.cpp @@ -33,7 +33,7 @@ vector ClassifyTreeCommand::setParameters(){ string ClassifyTreeCommand::getHelpString(){ try { string helpString = ""; - helpString += "The classify.tree command reads a tree and taxonomy file and output the concensus taxonomy for each node on the tree. \n"; + helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n"; helpString += "If you provide a group file, the concensus for each group will also be provided. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n"; diff --git a/classifytreecommand.h b/classifytreecommand.h index bd0e1ce..026e4ba 100644 --- a/classifytreecommand.h +++ b/classifytreecommand.h @@ -24,7 +24,7 @@ public: string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/Classify.tree"; } - string getDescription() { return "Find the concensus taxonomy for the descendant of each tree node"; } + string getDescription() { return "Find the consensus taxonomy for the descendant of each tree node"; } int execute(); void help() { m->mothurOut(getHelpString()); } diff --git a/pairwiseseqscommand.cpp b/pairwiseseqscommand.cpp index 114f545..ed11dfb 100644 --- a/pairwiseseqscommand.cpp +++ b/pairwiseseqscommand.cpp @@ -8,19 +8,6 @@ */ #include "pairwiseseqscommand.h" -#include "sequence.hpp" - -#include "gotohoverlap.hpp" -#include "needlemanoverlap.hpp" -#include "blastalign.hpp" -#include "noalign.hpp" - -#include "ignoregaps.h" -#include "eachgapdist.h" -#include "eachgapignore.h" -#include "onegapdist.h" -#include "onegapignore.h" - //********************************************************************************************************************** vector PairwiseSeqsCommand::setParameters(){ @@ -247,25 +234,6 @@ PairwiseSeqsCommand::PairwiseSeqsCommand(string option) { if (calc == "default") { calc = "onegap"; } } m->splitAtDash(calc, Estimators); - - ValidCalculators validCalculator; - if (countends) { - for (int i=0; imothurOut(align + " is not a valid alignment option. I will run the command using needleman."); - m->mothurOutEndLine(); - alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); - } + longestBase = 2000; //will need to update this in driver if we find sequences with more bases. hardcoded so we don't have the pre-read user fasta file. cutoff += 0.005; @@ -357,11 +315,11 @@ int PairwiseSeqsCommand::execute(){ driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); return 0; } char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); @@ -370,7 +328,7 @@ int PairwiseSeqsCommand::execute(){ //do your part driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); return 0; } char buf[5]; strcpy(buf, "done"); @@ -390,7 +348,7 @@ int PairwiseSeqsCommand::execute(){ if (output != "square"){ driverMPI(start, end, outputFile, mySize); } else { driverMPI(start, end, outputFile, mySize, output); } - if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; @@ -405,7 +363,7 @@ int PairwiseSeqsCommand::execute(){ for(int b = 1; b < processors; b++) { unsigned long long fileSize; - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); m->mothurRemove(outputFile); return 0; } MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); @@ -444,7 +402,7 @@ int PairwiseSeqsCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //if you don't need to fork anything if(processors == 1){ if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } @@ -465,14 +423,14 @@ int PairwiseSeqsCommand::execute(){ createProcesses(outputFile); } - #else + //#else //ifstream inFASTA; - if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } - else { driver(0, numSeqs, outputFile, "square"); } - #endif + //if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } + //else { driver(0, numSeqs, outputFile, "square"); } + //#endif #endif - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; m->mothurRemove(outputFile); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); @@ -500,10 +458,8 @@ int PairwiseSeqsCommand::execute(){ m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; m->mothurRemove(outputFile); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } } - - delete distCalculator; //set phylip file as new current phylipfile string current = ""; @@ -535,9 +491,11 @@ int PairwiseSeqsCommand::execute(){ /**************************************************************************************************/ void PairwiseSeqsCommand::createProcesses(string filename) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 1; + int process = 1; processIDS.clear(); + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //loop through and create all the processes you want while (process != processors) { @@ -567,13 +525,51 @@ void PairwiseSeqsCommand::createProcesses(string filename) { int temp = processIDS[i]; wait(&temp); } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the distanceData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //that's why the distance calculator was moved inside of the driver to make separate copies. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; //[processors-1]; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor-1 worker threads. + for( int i=0; iappendFiles((filename + toString(processIDS[i]) + ".temp"), filename); m->mothurRemove((filename + toString(processIDS[i]) + ".temp")); } -#endif + } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "createProcesses"); @@ -587,6 +583,33 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl try { int startTime = time(NULL); + + Alignment* alignment; + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } + + ValidCalculators validCalculator; + Dist* distCalculator; + if (countends) { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap") { distCalculator = new eachGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } //column file ofstream outFile(dFileName.c_str(), ios::trunc); @@ -606,7 +629,7 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl for(int j=0;jcontrol_pressed) { outFile.close(); return 0; } + if (m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; } if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { alignment->resize(alignDB.get(i).getUnaligned().length()+1); @@ -643,6 +666,8 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, fl m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); outFile.close(); + delete alignment; + delete distCalculator; return 1; } @@ -657,7 +682,34 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st try { int startTime = time(NULL); + + Alignment* alignment; + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } + ValidCalculators validCalculator; + Dist* distCalculator; + if (countends) { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap") { distCalculator = new eachGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + //column file ofstream outFile(dFileName.c_str(), ios::trunc); outFile.setf(ios::fixed, ios::showpoint); @@ -675,7 +727,7 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st for(int j=0;jcontrol_pressed) { outFile.close(); return 0; } + if (m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; } if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { alignment->resize(alignDB.get(i).getUnaligned().length()+1); @@ -708,6 +760,8 @@ int PairwiseSeqsCommand::driver(int startLine, int endLine, string dFileName, st m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); outFile.close(); + delete alignment; + delete distCalculator; return 1; } @@ -723,14 +777,41 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, try { MPI_Status status; int startTime = time(NULL); + + Alignment* alignment; + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } + ValidCalculators validCalculator; + Dist* distCalculator; + if (countends) { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap") { distCalculator = new eachGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + string outputString = ""; for(int i=startLine;icontrol_pressed) { return 0; } + if (m->control_pressed) { delete alignment; delete distCalculator; return 0; } if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { alignment->resize(alignDB.get(i).getUnaligned().length()+1); @@ -772,7 +853,8 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, delete buf; } - + delete alignment; + delete distCalculator; return 1; } catch(exception& e) { @@ -794,7 +876,33 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); - + Alignment* alignment; + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } + + ValidCalculators validCalculator; + Dist* distCalculator; + if (countends) { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap") { distCalculator = new eachGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", Estimators[0]) == true) { + if (Estimators[0] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (Estimators[0] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (Estimators[0] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + string outputString = ""; size = 0; @@ -811,7 +919,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi for(int j=0;jcontrol_pressed) { return 0; } + if (m->control_pressed) { delete alignment; delete distCalculator; return 0; } if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { alignment->resize(alignDB.get(i).getUnaligned().length()+1); @@ -848,6 +956,8 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi } MPI_File_close(&outMPI); + delete alignment; + delete distCalculator; return 1; } @@ -870,7 +980,16 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); - + Alignment* alignment; + if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } + else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } + else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } + else if(align == "noalign") { alignment = new NoAlign(); } + else { + m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); + m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); + } string outputString = ""; size = 0; @@ -887,7 +1006,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi for(int j=0;jcontrol_pressed) { return 0; } + if (m->control_pressed) { delete alignment; return 0; } if (alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { alignment->resize(alignDB.get(i).getUnaligned().length()+1); @@ -925,6 +1044,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi MPI_File_close(&outMPI); + delete alignment; return 1; } catch(exception& e) { diff --git a/pairwiseseqscommand.h b/pairwiseseqscommand.h index a3a91f7..ba82f47 100644 --- a/pairwiseseqscommand.h +++ b/pairwiseseqscommand.h @@ -17,6 +17,18 @@ #include "validcalculator.h" #include "dist.h" #include "sequencedb.h" +#include "sequence.hpp" + +#include "gotohoverlap.hpp" +#include "needlemanoverlap.hpp" +#include "blastalign.hpp" +#include "noalign.hpp" + +#include "ignoregaps.h" +#include "eachgapdist.h" +#include "eachgapignore.h" +#include "onegapdist.h" +#include "onegapignore.h" class PairwiseSeqsCommand : public Command { @@ -44,8 +56,6 @@ private: vector processIDS; //end line, processid vector lines; - Alignment* alignment; - Dist* distCalculator; SequenceDB alignDB; void createProcesses(string); @@ -60,12 +70,254 @@ private: string fastaFileName, align, calc, outputDir, output; float match, misMatch, gapOpen, gapExtend, cutoff; - int processors; + int processors, longestBase; vector fastaFileNames, Estimators; vector outputNames; bool abort, countends, compress; }; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct pairwiseData { + string outputFileName; + string align, square, distcalcType, output; + unsigned long long start; + unsigned long long end; + MothurOut* m; + float match, misMatch, gapOpen, gapExtend, cutoff; + int count, threadID, longestBase; + bool countends; + SequenceDB alignDB; + + pairwiseData(){} + pairwiseData(string ofn, string al, string sq, string di, bool co, string op, SequenceDB DB, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, int tid) { + outputFileName = ofn; + m = mout; + start = st; + end = en; + match = ma; + misMatch = misMa; + gapOpen = gapO; + gapExtend = gapE; + longestBase = thr; + align = al; + square = sq; + distcalcType = di; + countends = co; + alignDB = DB; + count = 0; + output = op; + threadID = tid; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MyPairwiseSquareThreadFunction(LPVOID lpParam){ + pairwiseData* pDataArray; + pDataArray = (pairwiseData*)lpParam; + + try { + ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc); + outFile.setf(ios::fixed, ios::showpoint); + outFile << setprecision(4); + + pDataArray->count = pDataArray->end; + + int startTime = time(NULL); + + Alignment* alignment; + if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); } + else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); } + else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); } + else if(pDataArray->align == "noalign") { alignment = new NoAlign(); } + else { + pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman."); + pDataArray->m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); + } + + ValidCalculators validCalculator; + Dist* distCalculator; + if (pDataArray->countends) { + if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { + if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); } + else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { + if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + + if(pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; } + + for(int i=pDataArray->start;iend;i++){ + + string name = pDataArray->alignDB.get(i).getName(); + //pad with spaces to make compatible + if (name.length() < 10) { while (name.length() < 10) { name += " "; } } + + outFile << name << '\t'; + + for(int j=0;jalignDB.getNumSeqs();j++){ + + if (pDataArray->m->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; } + + if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { + alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1); + } + + if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) { + alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1); + } + + Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned()); + Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned()); + + alignment->align(seqI.getUnaligned(), seqJ.getUnaligned()); + seqI.setAligned(alignment->getSeqAAln()); + seqJ.setAligned(alignment->getSeqBAln()); + + distCalculator->calcDist(seqI, seqJ); + double dist = distCalculator->getDist(); + + outFile << dist << '\t'; + } + + outFile << endl; + + if(i % 100 == 0){ + pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + } + + } + pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + + outFile.close(); + delete alignment; + delete distCalculator; + + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseSquareThreadFunction"); + exit(1); + } +} + +/**************************************************************************************************/ +static DWORD WINAPI MyPairwiseThreadFunction(LPVOID lpParam){ + pairwiseData* pDataArray; + pDataArray = (pairwiseData*)lpParam; + + try { + ofstream outFile((pDataArray->outputFileName).c_str(), ios::trunc); + outFile.setf(ios::fixed, ios::showpoint); + outFile << setprecision(4); + + pDataArray->count = pDataArray->end; + + int startTime = time(NULL); + + Alignment* alignment; + if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); } + else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); } + else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); } + else if(pDataArray->align == "noalign") { alignment = new NoAlign(); } + else { + pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman."); + pDataArray->m->mothurOutEndLine(); + alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, pDataArray->longestBase); + } + + ValidCalculators validCalculator; + Dist* distCalculator; + if (pDataArray->countends) { + if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { + if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->distcalcType == "eachgap") { distCalculator = new eachGapDist(); } + else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapDist(); } + } + }else { + if (validCalculator.isValidCalculator("distance", pDataArray->distcalcType) == true) { + if (pDataArray->distcalcType == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (pDataArray->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + + if((pDataArray->output == "lt") && pDataArray->start == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; } + + for(int i=pDataArray->start;iend;i++){ + + if(pDataArray->output == "lt") { + string name = pDataArray->alignDB.get(i).getName(); + if (name.length() < 10) { //pad with spaces to make compatible + while (name.length() < 10) { name += " "; } + } + outFile << name << '\t'; + } + + + for(int j=0;jm->control_pressed) { outFile.close(); delete alignment; delete distCalculator; return 0; } + + if (pDataArray->alignDB.get(i).getUnaligned().length() > alignment->getnRows()) { + alignment->resize(pDataArray->alignDB.get(i).getUnaligned().length()+1); + } + + if (pDataArray->alignDB.get(j).getUnaligned().length() > alignment->getnRows()) { + alignment->resize(pDataArray->alignDB.get(j).getUnaligned().length()+1); + } + + Sequence seqI(pDataArray->alignDB.get(i).getName(), pDataArray->alignDB.get(i).getAligned()); + Sequence seqJ(pDataArray->alignDB.get(j).getName(), pDataArray->alignDB.get(j).getAligned()); + + alignment->align(seqI.getUnaligned(), seqJ.getUnaligned()); + seqI.setAligned(alignment->getSeqAAln()); + seqJ.setAligned(alignment->getSeqBAln()); + + distCalculator->calcDist(seqI, seqJ); + double dist = distCalculator->getDist(); + + if(dist <= pDataArray->cutoff){ + if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; } + } + if (pDataArray->output == "lt") { outFile << dist << '\t'; } + } + + if (pDataArray->output == "lt") { outFile << endl; } + + if(i % 100 == 0){ + pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + } + + } + pDataArray->m->mothurOut(toString(pDataArray->end-1) + "\t" + toString(time(NULL) - startTime)); pDataArray->m->mothurOutEndLine(); + + outFile.close(); + delete alignment; + delete distCalculator; + + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "PairwiseSeqsCommand", "MyPairwiseThreadFunction"); + exit(1); + } +} + +#endif + + #endif -- 2.39.2