From 532c39ca689a9ad7fdff7a81671b2128c71f9671 Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 3 Jun 2011 17:17:33 +0000 Subject: [PATCH] fixed bug with blastdb.cpp --- blastdb.cpp | 37 ++++++++++++++++++------------------- chimera.h | 1 + chimeraslayer.cpp | 6 ++++-- chimeraslayer.h | 2 ++ chimeraslayercommand.cpp | 29 +++++++++++++++++++++++++---- pintail.cpp | 6 +++--- 6 files changed, 53 insertions(+), 28 deletions(-) diff --git a/blastdb.cpp b/blastdb.cpp index 48b09fb..a54c9bd 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -21,9 +21,9 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { int randNumber = rand(); //int randNumber = 12345; - dbFileName = tag + toString(randNumber) + ".template.unaligned.fasta"; - queryFileName = tag + toString(randNumber) + ".candidate.unaligned.fasta"; - blastFileName = tag + toString(randNumber) + ".blast"; + dbFileName = tag + toString(getpid()) + toString(randNumber) + ".template.unaligned.fasta"; + queryFileName = tag + toString(getpid()) + toString(randNumber) + ".candidate.unaligned.fasta"; + blastFileName = tag + toString(getpid()) + toString(randNumber) + ".blast"; //make sure blast exists in the write place path = m->argv; @@ -91,9 +91,9 @@ BlastDB::BlastDB() : Database() { int randNumber = rand(); //int randNumber = 12345; - dbFileName = toString(randNumber) + ".template.unaligned.fasta"; - queryFileName = toString(randNumber) + ".candidate.unaligned.fasta"; - blastFileName = toString(randNumber) + ".blast"; + dbFileName = toString(randNumber) + toString(getpid()) + ".template.unaligned.fasta"; + queryFileName = toString(randNumber) + toString(getpid()) + ".candidate.unaligned.fasta"; + blastFileName = toString(randNumber) + toString(getpid()) + ".blast"; //make sure blast exists in the write place path = m->argv; @@ -181,7 +181,7 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { ofstream queryFile; int randNumber = rand(); - m->openOutputFile((queryFileName+toString(randNumber)), queryFile); + m->openOutputFile((queryFileName+toString(getpid())+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; queryFile << seq->getUnaligned() << endl; queryFile.close(); @@ -195,10 +195,10 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); - blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber)); + blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber)); #else blastCommand = "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + "\"" + dbFileName + "\"" + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); - blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber)); + blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber)); //wrap entire string in "" blastCommand = "\"" + blastCommand + "\""; #endif @@ -206,7 +206,7 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { system(blastCommand.c_str()); ifstream m8FileHandle; - m->openInputFile(blastFileName+toString(randNumber), m8FileHandle, "no error"); + m->openInputFile(blastFileName+toString(getpid())+toString(randNumber), m8FileHandle, "no error"); string dummy; int templateAccession; @@ -222,8 +222,8 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { topMatches.push_back(templateAccession); } m8FileHandle.close(); - remove((queryFileName+toString(randNumber)).c_str()); - remove((blastFileName+toString(randNumber)).c_str()); + remove((queryFileName+toString(getpid())+toString(randNumber)).c_str()); + remove((blastFileName+toString(getpid())+toString(randNumber)).c_str()); return topMatches; } @@ -244,7 +244,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { ofstream queryFile; int randNumber = rand(); //int randNumber = 12345; - m->openOutputFile((queryFileName+toString(randNumber)), queryFile); + m->openOutputFile((queryFileName+toString(getpid())+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; queryFile << seq->getUnaligned() << endl; queryFile.close(); @@ -256,22 +256,21 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { string blastCommand; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn - blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber)); + blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber)); #else //blastCommand = path + "blast\\bin\\megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn //blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber)); blastCommand = "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn - blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber)); + blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber)); //wrap entire string in "" blastCommand = "\"" + blastCommand + "\""; #endif - //cout << blastCommand << endl; system(blastCommand.c_str()); ifstream m8FileHandle; - m->openInputFile(blastFileName+toString(randNumber), m8FileHandle, "no error"); + m->openInputFile(blastFileName+toString(getpid())+toString(randNumber), m8FileHandle, "no error"); string dummy, eScore; int templateAccession; @@ -292,8 +291,8 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { //cout << templateAccession << endl; } m8FileHandle.close(); - remove((queryFileName+toString(randNumber)).c_str()); - remove((blastFileName+toString(randNumber)).c_str()); + remove((queryFileName+toString(getpid())+toString(randNumber)).c_str()); + remove((blastFileName+toString(getpid())+toString(randNumber)).c_str()); //cout << "\n" ; return topMatches; } diff --git a/chimera.h b/chimera.h index 9c5a219..7e327dd 100644 --- a/chimera.h +++ b/chimera.h @@ -160,6 +160,7 @@ class Chimera { virtual Sequence print(ostream&, ostream&){ Sequence temp; return temp; } virtual Sequence print(ostream&, ostream&, data_results, data_results) { Sequence temp; return temp; } virtual int print(ostream&, ostream&, string){ return 0; } + virtual int getNumNoParents(){ return 0; } virtual data_results getResults() { data_results results; return results; } #ifdef USE_MPI diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 97b331b..5d714fd 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -34,6 +34,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num numWanted = numw; realign = r; trimChimera = trim; + numNoParents = 0; doPrep(); } @@ -65,6 +66,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map ChimeraSlayer::getBlastSeqs(Sequence q, vector& db, //string qname = q->getName().substr(0, q->getName().find_last_of('_')); //cout << qname << endl; + if (mergedResults.size() == 0) { numNoParents++; } + for (int i = 0; i < mergedResults.size(); i++) { //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; if (db[mergedResults[i]]->getName() != q.getName()) { @@ -1127,8 +1131,6 @@ vector ChimeraSlayer::getBlastSeqs(Sequence q, vector& db, delete queryRight; delete queryLeft; - if (refResults.size() == 0) { m->mothurOut("[WARNING]: megablast returned 0 potential parents, so we are not able to check " + q.getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } - return refResults; } catch(exception& e) { diff --git a/chimeraslayer.h b/chimeraslayer.h index 304ea10..8c3cbb2 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -34,6 +34,7 @@ class ChimeraSlayer : public Chimera { Sequence print(ostream&, ostream&, data_results, data_results); void printHeader(ostream&); int doPrep(); + int getNumNoParents() { return numNoParents; } data_results getResults() { return printResults; } #ifdef USE_MPI @@ -49,6 +50,7 @@ class ChimeraSlayer : public Chimera { Database* databaseLeft; map priority; //for template=self, seqname, seqAligned, abundance set chimericSeqs; //for template=self, so we don't add chimeric sequences to the userTemplate set + int numNoParents; vector chimeraResults; data_results printResults; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 0e7e19b..fed3613 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -509,6 +509,16 @@ int ChimeraSlayerCommand::execute(){ //do your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos); + int numNoParents = chimera->getNumNoParents(); + int temp; + for(int i = 1; i < processors; i++) { + MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status); + numNoParents += temp; + } + + + if (numSeqs == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } }else{ //you are a child process @@ -524,6 +534,9 @@ int ChimeraSlayerCommand::execute(){ //do your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos); + + int numNoParents = chimera->getNumNoParents(); + MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } @@ -556,6 +569,9 @@ int ChimeraSlayerCommand::execute(){ if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); + int numNoParents = chimera->getNumNoParents(); + if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } }else{ @@ -592,6 +608,10 @@ int ChimeraSlayerCommand::execute(){ #else numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); + int numNoParents = chimera->getNumNoParents(); + if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + + if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } #endif @@ -670,7 +690,6 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f string candidateAligned = candidateSeq->getAligned(); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file - if (candidateSeq->getAligned().length() != templateSeqsLength) { m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine(); }else{ @@ -906,9 +925,8 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename ofstream out; string tempFile = outputFileName + toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); - out << num << endl; + out << num << '\t' << chimera->getNumNoParents() << endl; out.close(); - exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -923,14 +941,17 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename wait(&temp); } + int numNoParents = 0; for (int i = 0; i < processIDS.size(); i++) { ifstream in; string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp"; m->openInputFile(tempFile, in); - if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } + if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; } in.close(); remove(tempFile.c_str()); } + if (num == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + return num; #endif } diff --git a/pintail.cpp b/pintail.cpp index 10f931f..f793535 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -290,7 +290,7 @@ Sequence Pintail::print(ostream& out, ostream& outAcc) { } #ifdef USE_MPI //*************************************************************************************************************** -Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) { +Sequence Pintail::print(MPI_File& out, MPI_File& outAcc) { try { string outputString = ""; @@ -320,7 +320,7 @@ Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); delete buf; - return NULL; + return *querySeq; } outputString += "Observed\t"; @@ -340,7 +340,7 @@ Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); delete buf2; - return NULL; + return *querySeq; } catch(exception& e) { m->errorOut(e, "Pintail", "print"); -- 2.39.2