X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=distancecommand.cpp;h=4cb98419fd23f540e174b068dd9a832b79fb3036;hb=3e2465c16d187247ce3befd29811c2d5dfc15ee8;hp=b3f0a2dd6974b3eb72120a9d6458d55e9eaac6ff;hpb=a98eb683e17d8e49583bf2d215ab7562a4cdca75;p=mothur.git diff --git a/distancecommand.cpp b/distancecommand.cpp index b3f0a2d..4cb9841 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -14,8 +14,57 @@ #include "onegapdist.h" #include "onegapignore.h" -//********************************************************************************************************************** +//********************************************************************************************************************** +vector DistanceCommand::getValidParameters(){ + try { + string Array[] = {"fasta","oldfasta","column", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir","compress"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +DistanceCommand::DistanceCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["phylip"] = tempOutNames; + outputTypes["column"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "DistanceCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector DistanceCommand::getRequiredParameters(){ + try { + string Array[] = {"fasta"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector DistanceCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "DistanceCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** DistanceCommand::DistanceCommand(string option) { try { abort = false; @@ -26,7 +75,8 @@ DistanceCommand::DistanceCommand(string option) { else { //valid paramters for this command - string Array[] = {"fasta","oldfasta","column", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir"}; + string Array[] = {"fasta","oldfasta","column", "output", "calc", "countends", "cutoff", "processors", "outputdir","inputdir","compress"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -40,6 +90,11 @@ DistanceCommand::DistanceCommand(string option) { if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["phylip"] = tempOutNames; + outputTypes["column"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -114,6 +169,9 @@ DistanceCommand::DistanceCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if(temp == "not found"){ temp = "1"; } convert(temp, processors); + temp = validParameter.validFile(parameters, "compress", false); if(temp == "not found"){ temp = "F"; } + convert(temp, compress); + output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "column"; } if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both."); m->mothurOutEndLine(); abort=true; } @@ -153,20 +211,14 @@ DistanceCommand::DistanceCommand(string option) { //********************************************************************************************************************** -DistanceCommand::~DistanceCommand(){ - - for(int i=0;imothurOut("The dist.seqs command reads a file containing sequences and creates a distance file.\n"); - m->mothurOut("The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, cutoff and processors. \n"); + m->mothurOut("The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors. \n"); m->mothurOut("The fasta parameter is required.\n"); m->mothurOut("The oldfasta and column parameters allow you to append the distances calculated to the column file.\n"); m->mothurOut("The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap.\n"); @@ -174,6 +226,7 @@ void DistanceCommand::help(){ m->mothurOut("The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n"); m->mothurOut("The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n"); m->mothurOut("The processors parameter allows you to specify number of processors to use. The default is 1.\n"); + m->mothurOut("The compress parameter allows you to indicate that you want the resulting distance file compressed. The default is false.\n"); m->mothurOut("The dist.seqs command should be in the following format: \n"); m->mothurOut("dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n"); m->mothurOut("Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n"); @@ -208,11 +261,12 @@ int DistanceCommand::execute(){ if (output == "lt") { //does the user want lower triangle phylip formatted file outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "phylip.dist"; - remove(outputFile.c_str()); + remove(outputFile.c_str()); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist"; + outputTypes["column"].push_back(outputFile); //so we don't accidentally overwrite if (outputFile == column) { @@ -224,6 +278,7 @@ int DistanceCommand::execute(){ }else { //assume square outputFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "square.dist"; remove(outputFile.c_str()); + outputTypes["phylip"].push_back(outputFile); } @@ -265,25 +320,25 @@ int DistanceCommand::execute(){ driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } //wait on chidren for(int i = 1; i < processors; i++) { - if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } - char buf[4]; - MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); + char buf[5]; + MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); } }else { //you are a child process //do your part driverMPI(start, end, outMPI, cutoff); - if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } - char buf[4]; + char buf[5]; strcpy(buf, "done"); //tell parent you are done. - MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD); + MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } MPI_File_close(&outMPI); @@ -298,7 +353,7 @@ int DistanceCommand::execute(){ if (output != "square"){ driverMPI(start, end, outputFile, mySize); } else { driverMPI(start, end, outputFile, mySize, output); } - if (m->control_pressed) { delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); delete distCalculator; return 0; } int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; @@ -317,7 +372,7 @@ int DistanceCommand::execute(){ for(int b = 1; b < processors; b++) { unsigned long int fileSize; - if (m->control_pressed) { MPI_File_close(&outMPI); delete distCalculator; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); delete distCalculator; return 0; } MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); @@ -363,28 +418,35 @@ int DistanceCommand::execute(){ else { driver(0, numSeqs, outputFile, "square"); } }else{ //you have multiple processors + unsigned long int numDists = 0; + + if (output == "square") { + numDists = numSeqs * numSeqs; + }else { + for(int i=0;i processors) { break; } + } + } + } + + if (numDists < processors) { processors = numDists; } + for (int i = 0; i < processors; i++) { - lines.push_back(new linePair()); + distlinePair tempLine; + lines.push_back(tempLine); if (output != "square") { - lines[i]->start = int (sqrt(float(i)/float(processors)) * numSeqs); - lines[i]->end = int (sqrt(float(i+1)/float(processors)) * numSeqs); + lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs); + lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs); }else{ - lines[i]->start = int ((float(i)/float(processors)) * numSeqs); - lines[i]->end = int ((float(i+1)/float(processors)) * numSeqs); + lines[i].start = int ((float(i)/float(processors)) * numSeqs); + lines[i].end = int ((float(i+1)/float(processors)) * numSeqs); } + } - - createProcesses(outputFile); - - map::iterator it = processIDS.begin(); - rename((outputFile + toString(it->second) + ".temp").c_str(), outputFile.c_str()); - it++; - //append and remove temp files - for (; it != processIDS.end(); it++) { - m->appendFiles((outputFile + toString(it->second) + ".temp"), outputFile); - remove((outputFile + toString(it->second) + ".temp").c_str()); - } + createProcesses(outputFile); } #else //ifstream inFASTA; @@ -393,7 +455,7 @@ int DistanceCommand::execute(){ #endif #endif - if (m->control_pressed) { delete distCalculator; remove(outputFile.c_str()); return 0; } + if (m->control_pressed) { outputTypes.clear(); delete distCalculator; remove(outputFile.c_str()); return 0; } #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); @@ -422,6 +484,13 @@ int DistanceCommand::execute(){ remove(outputFile.c_str()); outputFile = column; } + + if (outputDir != "") { + string newOutputName = outputDir + m->getSimpleName(outputFile); + rename(outputFile.c_str(), newOutputName.c_str()); + remove(outputFile.c_str()); + outputFile = newOutputName; + } } @@ -429,7 +498,7 @@ int DistanceCommand::execute(){ } #endif - if (m->control_pressed) { delete distCalculator; remove(outputFile.c_str()); return 0; } + if (m->control_pressed) { outputTypes.clear(); delete distCalculator; remove(outputFile.c_str()); return 0; } delete distCalculator; @@ -438,6 +507,15 @@ int DistanceCommand::execute(){ m->mothurOut(outputFile); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - startTime) + " to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + + + if (m->isTrue(compress)) { + m->mothurOut("Compressing..."); m->mothurOutEndLine(); + m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine(); + system(("gzip -v " + outputFile).c_str()); + outputNames.push_back(outputFile + ".gz"); + }else { outputNames.push_back(outputFile); } + return 0; } @@ -450,7 +528,7 @@ int DistanceCommand::execute(){ void DistanceCommand::createProcesses(string filename) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; processIDS.clear(); //loop through and create all the processes you want @@ -458,20 +536,36 @@ void DistanceCommand::createProcesses(string filename) { int pid = fork(); if (pid > 0) { - processIDS[lines[process]->end] = pid; //create map from line number to pid so you can append files in correct order later + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - if (output != "square") { driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", cutoff); } - else { driver(lines[process]->start, lines[process]->end, filename + toString(getpid()) + ".temp", "square"); } + if (output != "square") { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", cutoff); } + else { driver(lines[process].start, lines[process].end, filename + toString(getpid()) + ".temp", "square"); } exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); + perror(" : "); + for (int i=0;i::iterator it = processIDS.begin(); it != processIDS.end(); it++) { - int temp = it->second; + for (int i=0;iappendFiles((filename + toString(processIDS[i]) + ".temp"), filename); + remove((filename + toString(processIDS[i]) + ".temp").c_str()); + } #endif } catch(exception& e) { @@ -1059,7 +1153,7 @@ bool DistanceCommand::sanityCheck() { } catch(exception& e) { - m->errorOut(e, "DistanceCommand", "m->appendFiles"); + m->errorOut(e, "DistanceCommand", "sanityCheck"); exit(1); } }