X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyseqscommand.cpp;h=bbdf8119ad9332f49be4b1c71679b50fc51fe68d;hb=15cde0905641f8adddc1cc704f8c064f760e7461;hp=e934d9d6d114a18759ec317185cd01085ac72052;hpb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6;p=mothur.git diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index e934d9d..bbdf811 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -14,8 +14,58 @@ #include "phylosummary.h" #include "knn.h" -//********************************************************************************************************************** +//********************************************************************************************************************** +vector ClassifySeqsCommand::getValidParameters(){ + try { + string AlignArray[] = {"template","fasta","name","group","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs","iters", "outputdir","inputdir"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ClassifySeqsCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +ClassifySeqsCommand::ClassifySeqsCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["taxonomy"] = tempOutNames; + outputTypes["taxsummary"] = tempOutNames; + outputTypes["matchdist"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector ClassifySeqsCommand::getRequiredParameters(){ + try { + string Array[] = {"fasta","template","taxonomy"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ClassifySeqsCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector ClassifySeqsCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ClassifySeqsCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** ClassifySeqsCommand::ClassifySeqsCommand(string option) { try { abort = false; @@ -40,6 +90,12 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["taxonomy"] = tempOutNames; + outputTypes["taxsummary"] = tempOutNames; + outputTypes["matchdist"] = tempOutNames; + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -106,10 +162,24 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { if (m->getDefaultPath() != "") { //default path is set string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]); m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = m->openInputFile(tryPath, in, "noerror"); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + fastaFileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]); + m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); fastaFileNames[i] = tryPath; } } + in.close(); if (ableToOpen == 1) { @@ -158,7 +228,20 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { if (m->getDefaultPath() != "") { //default path is set string tryPath = m->getDefaultPath() + m->getSimpleName(namefileNames[i]); m->mothurOut("Unable to open " + namefileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = m->openInputFile(tryPath, in, "noerror"); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + namefileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(namefileNames[i]); + m->mothurOut("Unable to open " + namefileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); namefileNames[i] = tryPath; } } @@ -200,10 +283,24 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { if (m->getDefaultPath() != "") { //default path is set string tryPath = m->getDefaultPath() + m->getSimpleName(groupfileNames[i]); m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); - ableToOpen = m->openInputFile(tryPath, in, "noerror"); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + groupfileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(groupfileNames[i]); + m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); groupfileNames[i] = tryPath; } } + in.close(); if (ableToOpen == 1) { @@ -336,7 +433,6 @@ int ClassifySeqsCommand::execute(){ if (m->control_pressed) { delete classify; return 0; } - vector outputNames; for (int s = 0; s < fastaFileNames.size(); s++) { @@ -354,11 +450,11 @@ int ClassifySeqsCommand::execute(){ if ((method == "knn") && (search == "distance")) { string DistName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "match.dist"; - classify->setDistName(DistName); outputNames.push_back(DistName); + classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName); } - outputNames.push_back(newTaxonomyFile); - outputNames.push_back(taxSummary); + outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile); + outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary); int start = time(NULL); int numFastaSeqs = 0; @@ -380,20 +476,11 @@ int ClassifySeqsCommand::execute(){ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; - //char* outNewTax = new char[newTaxonomyFile.length()]; - //memcpy(outNewTax, newTaxonomyFile.c_str(), newTaxonomyFile.length()); - char outNewTax[1024]; strcpy(outNewTax, newTaxonomyFile.c_str()); - - //char* outTempTax = new char[tempTaxonomyFile.length()]; - //memcpy(outTempTax, tempTaxonomyFile.c_str(), tempTaxonomyFile.length()); char outTempTax[1024]; strcpy(outTempTax, tempTaxonomyFile.c_str()); - - //char* inFileName = new char[fastaFileNames[s].length()]; - //memcpy(inFileName, fastaFileNames[s].c_str(), fastaFileNames[s].length()); char inFileName[1024]; strcpy(inFileName, fastaFileNames[s].c_str()); @@ -402,11 +489,7 @@ int ClassifySeqsCommand::execute(){ MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax); MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax); - //delete outNewTax; - //delete outTempTax; - //delete inFileName; - - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; } if (pid == 0) { //you are the root process @@ -427,7 +510,7 @@ int ClassifySeqsCommand::execute(){ //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } for (int i = 1; i < processors; i++) { int done; @@ -447,7 +530,7 @@ int ClassifySeqsCommand::execute(){ //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos); - if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; } int done = 0; MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); @@ -476,16 +559,6 @@ int ClassifySeqsCommand::execute(){ numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); - rename((newTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), newTaxonomyFile.c_str()); - rename((tempTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), tempTaxonomyFile.c_str()); - - for(int i=1;icontrol_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } if (namefile == "") { taxaSum.summarize(tempTaxonomyFile); } else { @@ -561,7 +634,7 @@ int ClassifySeqsCommand::execute(){ } remove(tempTaxonomyFile.c_str()); - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; } //print summary file ofstream outTaxTree; @@ -579,11 +652,11 @@ int ClassifySeqsCommand::execute(){ //get maxLevel from phylotree so you know how many 'unclassified's to add int maxLevel = taxaSum.getMaxLevel(); - + //read taxfile - this reading and rewriting is done to preserve the confidence scores. string name, taxon; while (!inTax.eof()) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } remove(unclass.c_str()); delete classify; return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } remove(unclass.c_str()); delete classify; return 0; } inTax >> name >> taxon; m->gobble(inTax); @@ -653,7 +726,7 @@ string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) { int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; int num = 0; //loop through and create all the processes you want @@ -674,11 +747,18 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, out.close(); exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } } + //parent does its part + num = driver(lines[0], taxFileName, tempTaxFile, filename); + //force parent to wait until all the processes are done - for (int i=0;icontrol_pressed) { return 0; } Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); - + if (candidateSeq->getName() != "") { + taxonomy = classify->getTaxonomy(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 0; } @@ -767,15 +855,20 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT } delete candidateSeq; - unsigned long int pos = inFASTA.tellg(); - if ((pos == -1) || (pos >= filePos->end)) { break; } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + unsigned long int pos = inFASTA.tellg(); + if ((pos == -1) || (pos >= filePos->end)) { break; } + #else + if (inFASTA.eof()) { break; } + #endif //report progress if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); } + } //report progress if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); } - + inFASTA.close(); outTax.close(); outTaxSimple.close();