X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=distancecommand.cpp;h=9243df3873a9687d43da9da940b32da1ce9a5091;hp=beea1bf915fccffa6b1f92e212e8125db6010af9;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=ae57e166b2ed7b475ec3f466106bd76fabadd063 diff --git a/distancecommand.cpp b/distancecommand.cpp index beea1bf..9243df3 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -12,17 +12,17 @@ //********************************************************************************************************************** vector DistanceCommand::setParameters(){ try { - CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(pcolumn); - CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn",false,false); parameters.push_back(poldfasta); - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(poutput); - CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "",false,false); parameters.push_back(pcalc); - CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pcountends); - CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcompress); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pcutoff); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn","column",false,false); parameters.push_back(pcolumn); + CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn","",false,false); parameters.push_back(poldfasta); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true, true); parameters.push_back(pfasta); + CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false, true); parameters.push_back(poutput); + CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc); + CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends); + CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false, true); parameters.push_back(pprocessors); + CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false, true); parameters.push_back(pcutoff); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -59,6 +59,22 @@ string DistanceCommand::getHelpString(){ } } //********************************************************************************************************************** +string DistanceCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "phylip") { pattern = "[filename],[outputtag],dist"; } + else if (type == "column") { pattern = "[filename],dist"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** DistanceCommand::DistanceCommand(){ try { abort = true; calledHelp = true; @@ -179,16 +195,17 @@ DistanceCommand::DistanceCommand(string option) { convert(temp, countends); temp = validParameter.validFile(parameters, "cutoff", false); if(temp == "not found"){ temp = "1.0"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); temp = validParameter.validFile(parameters, "compress", false); if(temp == "not found"){ temp = "F"; } convert(temp, compress); output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; } + if (output == "phylip") { output = "lt"; } if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; } @@ -227,14 +244,17 @@ int DistanceCommand::execute(){ if (!alignDB.sameLength()) { m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; } string outputFile; - + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); if (output == "lt") { //does the user want lower triangle phylip formatted file - outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip.dist"; + variables["[outputtag]"] = "phylip"; + outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format - outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist"; + outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); //so we don't accidentally overwrite @@ -245,7 +265,8 @@ int DistanceCommand::execute(){ m->mothurRemove(outputFile); }else { //assume square - outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square.dist"; + variables["[outputtag]"] = "square"; + outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } @@ -289,11 +310,11 @@ int DistanceCommand::execute(){ driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); @@ -302,7 +323,7 @@ int DistanceCommand::execute(){ //do your part driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; strcpy(buf, "done"); @@ -322,7 +343,7 @@ int DistanceCommand::execute(){ if (output != "square"){ driverMPI(start, end, outputFile, mySize); } else { driverMPI(start, end, outputFile, mySize, output); } - if (m->control_pressed) { outputTypes.clear(); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); return 0; } int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; @@ -341,7 +362,7 @@ int DistanceCommand::execute(){ for(int b = 1; b < processors; b++) { unsigned long long fileSize; - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); @@ -371,7 +392,7 @@ int DistanceCommand::execute(){ if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); } else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); } - if (m->control_pressed) { delete distCalculator; return 0; } + if (m->control_pressed) { return 0; } //tell parent you are done. MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD); @@ -380,7 +401,7 @@ int DistanceCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //if you don't need to fork anything if(processors == 1){ if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } @@ -483,7 +504,7 @@ int DistanceCommand::execute(){ } m->mothurOutEndLine(); - m->mothurOut("Output File Name: "); m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(outputFile); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); @@ -507,7 +528,7 @@ int DistanceCommand::execute(){ /**************************************************************************************************/ void DistanceCommand::createProcesses(string filename) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; processIDS.clear(); @@ -573,6 +594,9 @@ void DistanceCommand::createProcesses(string filename) { //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != (pDataArray[i]->endLine-pDataArray[i]->startLine)) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->endLine-pDataArray[i]->startLine) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } @@ -651,11 +675,11 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float if (output == "lt") { outFile << endl; } if(i % 100 == 0){ - m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } } - m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n"); outFile.close(); delete distCalculator; @@ -721,11 +745,11 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, string outFile << endl; if(i % 100 == 0){ - m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } } - m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n"); outFile.close(); delete distCalculator; @@ -788,8 +812,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } if(i % 100 == 0){ - //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << i << '\t' << (time(NULL) - startTime) << endl; + m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } @@ -805,8 +828,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } - //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n"); delete distCalculator; return 1; } @@ -883,9 +905,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned if(i % 100 == 0){ - //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << i << '\t' << (time(NULL) - startTime) << endl; - } + m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } //send results to parent @@ -899,8 +919,8 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned delete buf; } - //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n"); + MPI_File_close(&outMPI); delete distCalculator; @@ -978,8 +998,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned if(i % 100 == 0){ - //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << i << '\t' << (time(NULL) - startTime) << endl; + m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } @@ -994,8 +1013,8 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned delete buf; } - //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); - cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; + m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n"); + MPI_File_close(&outMPI); delete distCalculator; return 1; @@ -1014,7 +1033,7 @@ int DistanceCommand::convertMatrix(string outputFile) { string outfile = m->getRootName(outputFile) + "sorted.dist.temp"; //use the unix sort - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n " + outputFile + " -o " + outfile; system(command.c_str()); #else //sort using windows sort @@ -1094,7 +1113,7 @@ int DistanceCommand::convertMatrix(string outputFile) { exit(1); } } -/************************************************************************************************** +************************************************************************************************** int DistanceCommand::convertToLowerTriangle(string outputFile) { try{ @@ -1102,7 +1121,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) { string outfile = m->getRootName(outputFile) + "sorted.dist.temp"; //use the unix sort - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n " + outputFile + " -o " + outfile; system(command.c_str()); #else //sort using windows sort @@ -1188,7 +1207,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) { exit(1); } } -/**************************************************************************************************/ +**************************************************************************************************/ //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff, //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it. //also check to make sure the 2 files have the same alignment length.