X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=distancecommand.cpp;h=16407d59b9819183edb70748d0b3a0ca1ff9acda;hb=28bcfc4a41b8b82f66636587e0d4d355d07cbdd1;hp=b73cd7c751176c386a95931b0887c6790f046583;hpb=f06fdb807822f8e06db003ed809c87250905cfc8;p=mothur.git diff --git a/distancecommand.cpp b/distancecommand.cpp index b73cd7c..16407d5 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -8,11 +8,6 @@ */ #include "distancecommand.h" -#include "ignoregaps.h" -#include "eachgapdist.h" -#include "eachgapignore.h" -#include "onegapdist.h" -#include "onegapignore.h" //********************************************************************************************************************** vector DistanceCommand::setParameters(){ @@ -20,7 +15,7 @@ vector DistanceCommand::setParameters(){ CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn); CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput); + CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(poutput); CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc); CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends); CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress); @@ -64,6 +59,27 @@ string DistanceCommand::getHelpString(){ } } //********************************************************************************************************************** +string DistanceCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "phylip") { outputFileName = "dist"; } + else if (type == "column") { outputFileName = "dist"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** DistanceCommand::DistanceCommand(){ try { abort = true; calledHelp = true; @@ -152,6 +168,7 @@ DistanceCommand::DistanceCommand(string option) { m->openInputFile(fastafile, inFASTA); alignDB = SequenceDB(inFASTA); inFASTA.close(); + m->setFastaFile(fastafile); } oldfastafile = validParameter.validFile(parameters, "oldfasta", true); @@ -161,6 +178,7 @@ DistanceCommand::DistanceCommand(string option) { column = validParameter.validFile(parameters, "column", true); if (column == "not found") { column = ""; } else if (column == "not open") { abort = true; } + else { m->setColumnFile(column); } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -182,42 +200,23 @@ DistanceCommand::DistanceCommand(string option) { convert(temp, countends); temp = validParameter.validFile(parameters, "cutoff", false); if(temp == "not found"){ temp = "1.0"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); temp = validParameter.validFile(parameters, "compress", false); if(temp == "not found"){ temp = "F"; } convert(temp, compress); output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; } + if (output == "phylip") { output = "lt"; } if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; } if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so."); m->mothurOutEndLine(); abort=true; } if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column."); m->mothurOutEndLine(); output = "column"; } - - ValidCalculators validCalculator; - - if (m->isTrue(countends) == true) { - for (int i=0; igetRootName(m->getSimpleName(fastafile)) + "phylip.dist"; - remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile); + outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip." + getOutputFileNameTag("phylip"); + m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format - outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist"; + outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column"); outputTypes["column"].push_back(outputFile); //so we don't accidentally overwrite @@ -266,10 +265,10 @@ int DistanceCommand::execute(){ rename(column.c_str(), tempcolumn.c_str()); } - remove(outputFile.c_str()); + m->mothurRemove(outputFile); }else { //assume square - outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square.dist"; - remove(outputFile.c_str()); + outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square." + getOutputFileNameTag("phylip"); + m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } @@ -312,11 +311,11 @@ int DistanceCommand::execute(){ driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); @@ -325,7 +324,7 @@ int DistanceCommand::execute(){ //do your part driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; strcpy(buf, "done"); @@ -340,12 +339,12 @@ int DistanceCommand::execute(){ //do your part string outputMyPart; - unsigned long int mySize; + unsigned long long mySize; if (output != "square"){ driverMPI(start, end, outputFile, mySize); } else { driverMPI(start, end, outputFile, mySize, output); } - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); return 0; } int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; @@ -362,9 +361,9 @@ int DistanceCommand::execute(){ //wait on chidren for(int b = 1; b < processors; b++) { - unsigned long int fileSize; + unsigned long long fileSize; - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); @@ -390,11 +389,11 @@ int DistanceCommand::execute(){ MPI_File_close(&outMPI); }else { //you are a child process //do your part - unsigned long int size; + unsigned long long size; if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); } else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); } - if (m->control_pressed) { delete distCalculator; return 0; } + if (m->control_pressed) { return 0; } //tell parent you are done. MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD); @@ -403,14 +402,14 @@ int DistanceCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //if you don't need to fork anything if(processors == 1){ if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } else { driver(0, numSeqs, outputFile, "square"); } }else{ //you have multiple processors - unsigned long int numDists = 0; + unsigned long long numDists = 0; if (output == "square") { numDists = numSeqs * numSeqs; @@ -440,14 +439,14 @@ int DistanceCommand::execute(){ createProcesses(outputFile); } - #else + //#else //ifstream inFASTA; - if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } - else { driver(0, numSeqs, outputFile, "square"); } - #endif + //if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } + //else { driver(0, numSeqs, outputFile, "square"); } + //#endif #endif - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; remove(outputFile.c_str()); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); @@ -470,17 +469,17 @@ int DistanceCommand::execute(){ if (outputFile == column) { string tempcolumn = column + ".old"; m->appendFiles(tempcolumn, outputFile); - remove(tempcolumn.c_str()); + m->mothurRemove(tempcolumn); }else{ m->appendFiles(outputFile, column); - remove(outputFile.c_str()); + m->mothurRemove(outputFile); outputFile = column; } if (outputDir != "") { string newOutputName = outputDir + m->getSimpleName(outputFile); rename(outputFile.c_str(), newOutputName.c_str()); - remove(outputFile.c_str()); + m->mothurRemove(outputFile); outputFile = newOutputName; } } @@ -490,9 +489,7 @@ int DistanceCommand::execute(){ } #endif - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; remove(outputFile.c_str()); return 0; } - - delete distCalculator; + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } //set phylip file as new current phylipfile string current = ""; @@ -532,7 +529,7 @@ int DistanceCommand::execute(){ /**************************************************************************************************/ void DistanceCommand::createProcesses(string filename) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; processIDS.clear(); @@ -565,25 +562,80 @@ void DistanceCommand::createProcesses(string filename) { int temp = processIDS[i]; wait(&temp); } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the distanceData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //that's why the distance calculator was moved inside of the driver to make separate copies. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; //[processors-1]; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor-1 worker threads. + for( int i=0; iappendFiles((filename + toString(processIDS[i]) + ".temp"), filename); - remove((filename + toString(processIDS[i]) + ".temp").c_str()); + m->mothurRemove((filename + toString(processIDS[i]) + ".temp")); } -#endif + } catch(exception& e) { m->errorOut(e, "DistanceCommand", "createProcesses"); exit(1); } } - /**************************************************************************************************/ /////// need to fix to work with calcs and sequencedb int DistanceCommand::driver(int startLine, int endLine, string dFileName, float cutoff){ try { - + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { outFile.close(); return 0; } + if (m->control_pressed) { delete distCalculator; outFile.close(); return 0; } //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop @@ -628,6 +680,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); outFile.close(); + delete distCalculator; return 1; } @@ -640,7 +693,26 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float /////// need to fix to work with calcs and sequencedb int DistanceCommand::driver(int startLine, int endLine, string dFileName, string square){ try { - + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { outFile.close(); return 0; } + if (m->control_pressed) { delete distCalculator; outFile.close(); return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); @@ -678,6 +750,7 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); outFile.close(); + delete distCalculator; return 1; } @@ -691,6 +764,28 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string /////// need to fix to work with calcs and sequencedb int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, float cutoff){ try { + + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { return 0; } + if (m->control_pressed) { delete distCalculator; return 0; } //if there was a column file given and we are appending, we don't want to calculate the distances that are already in the column file //the alignDB contains the new sequences and then the old, so if i an oldsequence and j is an old sequence then break out of this loop @@ -733,7 +828,8 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + delete distCalculator; return 1; } catch(exception& e) { @@ -743,8 +839,29 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } /**************************************************************************************************/ /////// need to fix to work with calcs and sequencedb -int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size){ +int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size){ try { + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { return 0; } + if (m->control_pressed) { delete distCalculator; return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); @@ -807,6 +924,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; MPI_File_close(&outMPI); + delete distCalculator; return 1; } @@ -817,8 +935,28 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned } /**************************************************************************************************/ /////// need to fix to work with calcs and sequencedb -int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long int& size, string square){ +int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){ try { + ValidCalculators validCalculator; + Dist* distCalculator; + if (m->isTrue(countends) == true) { + for (int i=0; icontrol_pressed) { return 0; } + if (m->control_pressed) { delete distCalculator; return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); @@ -881,7 +1019,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; MPI_File_close(&outMPI); - + delete distCalculator; return 1; } catch(exception& e) { @@ -898,7 +1036,7 @@ int DistanceCommand::convertMatrix(string outputFile) { string outfile = m->getRootName(outputFile) + "sorted.dist.temp"; //use the unix sort - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n " + outputFile + " -o " + outfile; system(command.c_str()); #else //sort using windows sort @@ -933,7 +1071,7 @@ int DistanceCommand::convertMatrix(string outputFile) { //m->openInputFile(outfile, in); while(!in.eof()) { - if (m->control_pressed) { in.close(); remove(outfile.c_str()); out.close(); return 0; } + if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; } in >> first >> second >> dist; m->gobble(in); @@ -968,7 +1106,7 @@ int DistanceCommand::convertMatrix(string outputFile) { in.close(); out.close(); - remove(outfile.c_str()); + m->mothurRemove(outfile); return 1; @@ -978,7 +1116,7 @@ int DistanceCommand::convertMatrix(string outputFile) { exit(1); } } -/************************************************************************************************** +************************************************************************************************** int DistanceCommand::convertToLowerTriangle(string outputFile) { try{ @@ -986,7 +1124,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) { string outfile = m->getRootName(outputFile) + "sorted.dist.temp"; //use the unix sort - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n " + outputFile + " -o " + outfile; system(command.c_str()); #else //sort using windows sort @@ -1023,7 +1161,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) { //m->openInputFile(outfile, in); while(!in.eof()) { - if (m->control_pressed) { in.close(); remove(outfile.c_str()); out.close(); return 0; } + if (m->control_pressed) { in.close(); m->mothurRemove(outfile); out.close(); return 0; } in >> first >> second >> dist; m->gobble(in); @@ -1062,7 +1200,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) { in.close(); out.close(); - remove(outfile.c_str()); + m->mothurRemove(outfile); return 1; @@ -1072,7 +1210,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) { exit(1); } } -/**************************************************************************************************/ +**************************************************************************************************/ //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff, //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it. //also check to make sure the 2 files have the same alignment length. @@ -1133,7 +1271,7 @@ bool DistanceCommand::sanityCheck() { string name1, name2; float dist; while (!inDist.eof()) { - if (m->control_pressed) { inDist.close(); outDist.close(); remove(outputFile.c_str()); return good; } + if (m->control_pressed) { inDist.close(); outDist.close(); m->mothurRemove(outputFile); return good; } inDist >> name1 >> name2 >> dist; m->gobble(inDist); @@ -1150,10 +1288,10 @@ bool DistanceCommand::sanityCheck() { outDist.close(); if (good) { - remove(column.c_str()); + m->mothurRemove(column); rename(outputFile.c_str(), column.c_str()); }else{ - remove(outputFile.c_str()); //temp file is bad because file mismatch above + m->mothurRemove(outputFile); //temp file is bad because file mismatch above } return good;