X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=distancecommand.h;h=2f53232b5943d835e8c27adf7374a5e3a8ce168f;hp=36a1e24b0986bb24a106fb3b72f6cba17f1acfce;hb=df7e3ff9f68ef157b0328a2d353c3258c5d45d89;hpb=83b5acbe3d6087a6cd73e80dde4b923387a01d01 diff --git a/distancecommand.h b/distancecommand.h index 36a1e24..2f53232 100644 --- a/distancecommand.h +++ b/distancecommand.h @@ -15,50 +15,218 @@ #include "validcalculator.h" #include "dist.h" #include "sequencedb.h" +#include "ignoregaps.h" +#include "eachgapdist.h" +#include "eachgapignore.h" +#include "onegapdist.h" +#include "onegapignore.h" +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct distanceData { + int startLine; + int endLine; + string dFileName; + float cutoff; + SequenceDB alignDB; + vector Estimators; + MothurOut* m; + string output; + int numNewFasta, count; + string countends; + + distanceData(){} + distanceData(int s, int e, string dbname, float c, SequenceDB db, vector Est, MothurOut* mout, string o, int num, string count) { + startLine = s; + endLine = e; + dFileName = dbname; + cutoff = c; + alignDB = db; + Estimators = Est; + m = mout; + output = o; + numNewFasta = num; + countends = count; + + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyDistThreadFunction(LPVOID lpParam){ + distanceData* pDataArray; + pDataArray = (distanceData*)lpParam; + + try { + ValidCalculators validCalculator; + Dist* distCalculator; + if (pDataArray->m->isTrue(pDataArray->countends) == true) { + for (int i=0; iEstimators.size(); i++) { + if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { + if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); } + else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapDist(); } + } + } + }else { + for (int i=0; iEstimators.size(); i++) { + if (validCalculator.isValidCalculator("distance", pDataArray->Estimators[i]) == true) { + if (pDataArray->Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); } + else if (pDataArray->Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } + else if (pDataArray->Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } + } + } + } + + int startTime = time(NULL); + + //column file + ofstream outFile(pDataArray->dFileName.c_str(), ios::trunc); + outFile.setf(ios::fixed, ios::showpoint); + outFile << setprecision(4); + pDataArray->count = 0; + + if (pDataArray->output != "square") { + if((pDataArray->output == "lt") && (pDataArray->startLine == 0)){ outFile << pDataArray->alignDB.getNumSeqs() << endl; } + + for(int i=pDataArray->startLine;iendLine;i++){ + if(pDataArray->output == "lt") { + string name = pDataArray->alignDB.get(i).getName(); + if (name.length() < 10) { //pad with spaces to make compatible + while (name.length() < 10) { name += " "; } + } + outFile << name << '\t'; + } + for(int j=0;jm->control_pressed) { delete distCalculator; outFile.close(); return 0; } + + //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file + //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop + if ((i >= pDataArray->numNewFasta) && (j >= pDataArray->numNewFasta)) { break; } + + distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j)); + double dist = distCalculator->getDist(); + + if(dist <= pDataArray->cutoff){ + if (pDataArray->output == "column") { outFile << pDataArray->alignDB.get(i).getName() << ' ' << pDataArray->alignDB.get(j).getName() << ' ' << dist << endl; } + } + if (pDataArray->output == "lt") { outFile << dist << '\t'; } + } + + if (pDataArray->output == "lt") { outFile << endl; } + + if(i % 100 == 0){ + pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } + pDataArray->count++; + } + pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n"); + }else{ + if(pDataArray->startLine == 0){ outFile << pDataArray->alignDB.getNumSeqs() << endl; } + + for(int i=pDataArray->startLine;iendLine;i++){ + + string name = pDataArray->alignDB.get(i).getName(); + //pad with spaces to make compatible + if (name.length() < 10) { while (name.length() < 10) { name += " "; } } + + outFile << name << '\t'; + + for(int j=0;jalignDB.getNumSeqs();j++){ + + if (pDataArray->m->control_pressed) { delete distCalculator; outFile.close(); return 0; } + + distCalculator->calcDist(pDataArray->alignDB.get(i), pDataArray->alignDB.get(j)); + double dist = distCalculator->getDist(); + + outFile << dist << '\t'; + } + + outFile << endl; + + if(i % 100 == 0){ + pDataArray->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); + } + pDataArray->count++; + } + pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count) + "\t" + toString(time(NULL) - startTime)+"\n"); + } + + outFile.close(); + delete distCalculator; + + return 0; + } + catch(exception& e) { + pDataArray->m->errorOut(e, "DistanceCommand", "MyDistThreadFunction"); + exit(1); + } +} +#endif +/**************************************************************************************************/ class DistanceCommand : public Command { public: DistanceCommand(string); - ~DistanceCommand(); - int execute(); - void help(); + DistanceCommand(); + ~DistanceCommand() {} + + vector setParameters(); + string getCommandName() { return "dist.seqs"; } + string getCommandCategory() { return "Sequence Processing"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "Schloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844. \nhttp://www.mothur.org/wiki/Dist.seqs"; } + string getDescription() { return "calculate the pairwaise distances between aligned sequences"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + private: - struct linePair { + struct distlinePair { int start; int end; + }; - Dist* distCalculator; + //Dist* distCalculator; SequenceDB alignDB; - string countends, output, fastafile, calc, outputDir, oldfastafile, column; + string countends, output, fastafile, calc, outputDir, oldfastafile, column, compress; + int processors, numNewFasta; float cutoff; - map processIDS; //end line, processid - vector lines; + vector processIDS; //end line, processid + vector lines; bool abort; - vector Estimators; //holds estimators to be used + vector Estimators, outputNames; //holds estimators to be used - //void appendFiles(string, string); + //void m->appendFiles(string, string); void createProcesses(string); int driver(/*Dist*, SequenceDB, */int, int, string, float); + int driver(int, int, string, string); #ifdef USE_MPI int driverMPI(int, int, MPI_File&, float); - int driverMPI(int, int, string, long&); + int driverMPI(int, int, string, unsigned long long&); + int driverMPI(int, int, string, unsigned long long&, string); #endif - int convertMatrix(string); + //int convertMatrix(string); bool sanityCheck(); - int convertToLowerTriangle(string); + //int convertToLowerTriangle(string); }; #endif +/**************************************************************************************************/ +