]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bug with blastdb.cpp
authorwestcott <westcott>
Fri, 3 Jun 2011 17:17:33 +0000 (17:17 +0000)
committerwestcott <westcott>
Fri, 3 Jun 2011 17:17:33 +0000 (17:17 +0000)
blastdb.cpp
chimera.h
chimeraslayer.cpp
chimeraslayer.h
chimeraslayercommand.cpp
pintail.cpp

index 48b09fba5fad87eca53d089ec369ee0149b19ade..a54c9bd135df7b5dc449d0dd435c551c60a99aeb 100644 (file)
@@ -21,9 +21,9 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) {
 
                int randNumber = rand();
                //int randNumber = 12345;
-               dbFileName = tag + toString(randNumber) + ".template.unaligned.fasta";
-               queryFileName = tag + toString(randNumber) + ".candidate.unaligned.fasta";
-               blastFileName = tag + toString(randNumber) + ".blast";
+               dbFileName = tag + toString(getpid()) + toString(randNumber) + ".template.unaligned.fasta";
+               queryFileName = tag + toString(getpid()) + toString(randNumber) + ".candidate.unaligned.fasta";
+               blastFileName = tag + toString(getpid()) + toString(randNumber) + ".blast";
                
                //make sure blast exists in the write place
                path = m->argv;
@@ -91,9 +91,9 @@ BlastDB::BlastDB() : Database() {
 
                int randNumber = rand();
                //int randNumber = 12345;
-               dbFileName = toString(randNumber) + ".template.unaligned.fasta";
-               queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
-               blastFileName = toString(randNumber) + ".blast";
+               dbFileName = toString(randNumber) + toString(getpid()) + ".template.unaligned.fasta";
+               queryFileName = toString(randNumber) + toString(getpid()) + ".candidate.unaligned.fasta";
+               blastFileName = toString(randNumber) + toString(getpid()) + ".blast";
                
                //make sure blast exists in the write place
                path = m->argv;
@@ -181,7 +181,7 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                
                ofstream queryFile;
                int randNumber = rand();
-               m->openOutputFile((queryFileName+toString(randNumber)), queryFile);
+               m->openOutputFile((queryFileName+toString(getpid())+toString(randNumber)), queryFile);
                queryFile << '>' << seq->getName() << endl;
                queryFile << seq->getUnaligned() << endl;
                queryFile.close();
@@ -195,10 +195,10 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                
                        blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);
-                       blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
+                       blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber));
                #else
                        blastCommand =  "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + "\"" + dbFileName + "\"" + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);
-                       blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
+                       blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber));
                        //wrap entire string in ""
                        blastCommand = "\"" + blastCommand + "\"";
                #endif
@@ -206,7 +206,7 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                system(blastCommand.c_str());
                
                ifstream m8FileHandle;
-               m->openInputFile(blastFileName+toString(randNumber), m8FileHandle, "no error");
+               m->openInputFile(blastFileName+toString(getpid())+toString(randNumber), m8FileHandle, "no error");
                
                string dummy;
                int templateAccession;
@@ -222,8 +222,8 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                        topMatches.push_back(templateAccession);
                }
                m8FileHandle.close();
-               remove((queryFileName+toString(randNumber)).c_str());
-               remove((blastFileName+toString(randNumber)).c_str());
+               remove((queryFileName+toString(getpid())+toString(randNumber)).c_str());
+               remove((blastFileName+toString(getpid())+toString(randNumber)).c_str());
 
                return topMatches;
        }
@@ -244,7 +244,7 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                ofstream queryFile;
                int randNumber = rand();
                //int randNumber = 12345;
-               m->openOutputFile((queryFileName+toString(randNumber)), queryFile);
+               m->openOutputFile((queryFileName+toString(getpid())+toString(randNumber)), queryFile);
                queryFile << '>' << seq->getName() << endl;
                queryFile << seq->getUnaligned() << endl;
                queryFile.close();
@@ -256,22 +256,21 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                string blastCommand;
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
-                       blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
+                       blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber));
                #else
                //blastCommand = path + "blast\\bin\\megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                //blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
 
                        blastCommand =  "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
-                       blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
+                       blastCommand += (" -i " + (queryFileName+toString(getpid())+toString(randNumber)) + " -o " + blastFileName+toString(getpid())+toString(randNumber));
                        //wrap entire string in ""
                        blastCommand = "\"" + blastCommand + "\"";
 
                #endif
-               //cout << blastCommand << endl;
                system(blastCommand.c_str());
 
                ifstream m8FileHandle;
-               m->openInputFile(blastFileName+toString(randNumber), m8FileHandle, "no error");
+               m->openInputFile(blastFileName+toString(getpid())+toString(randNumber), m8FileHandle, "no error");
        
                string dummy, eScore;
                int templateAccession;
@@ -292,8 +291,8 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
 //cout << templateAccession << endl;
                }
                m8FileHandle.close();
-               remove((queryFileName+toString(randNumber)).c_str());
-               remove((blastFileName+toString(randNumber)).c_str());
+               remove((queryFileName+toString(getpid())+toString(randNumber)).c_str());
+               remove((blastFileName+toString(getpid())+toString(randNumber)).c_str());
 //cout << "\n" ;               
                return topMatches;
        }
index 9c5a21995c2acd71516b4e3a9cf53732b10b54f9..7e327dd86bbe92a9ff6280847829134ed059f24a 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -160,6 +160,7 @@ class Chimera {
                virtual Sequence print(ostream&, ostream&){  Sequence temp; return temp; }
                virtual Sequence print(ostream&, ostream&, data_results, data_results) { Sequence temp; return temp; }
                virtual int print(ostream&, ostream&, string){  return 0; }
+               virtual int getNumNoParents(){  return 0; }
                virtual data_results getResults() { data_results results; return results; }
                
                #ifdef USE_MPI
index 97b331b65b556baafbd721cc6d0186097bdfa09e..5d714fd1ee88ff18a00619e96ae253847883a151 100644 (file)
@@ -34,6 +34,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num
                numWanted = numw;
                realign = r; 
                trimChimera = trim;
+               numNoParents = 0;
        
                doPrep();
        }
@@ -65,6 +66,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, in
                realign = r; 
                trimChimera = trim;
                priority = prior;
+               numNoParents = 0;
                
                createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
                
@@ -1115,6 +1117,8 @@ vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db,
                //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
                //cout << qname << endl;        
                
+               if (mergedResults.size() == 0) { numNoParents++; }
+               
                for (int i = 0; i < mergedResults.size(); i++) {
                        //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
                        if (db[mergedResults[i]]->getName() != q.getName()) { 
@@ -1127,8 +1131,6 @@ vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db,
                delete queryRight;
                delete queryLeft;
                
-               if (refResults.size() == 0) { m->mothurOut("[WARNING]: megablast returned 0 potential parents, so we are not able to check " + q.getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
-               
                return refResults;
        }
        catch(exception& e) {
index 304ea108f3ce76790b5bfa2e3946289eb8d97720..8c3cbb22994cb4956f5752806a76bf5b9b1361df 100644 (file)
@@ -34,6 +34,7 @@ class ChimeraSlayer : public Chimera {
                Sequence print(ostream&, ostream&, data_results, data_results);
                void printHeader(ostream&);
                int doPrep();
+               int getNumNoParents() { return numNoParents; }
                data_results getResults() { return printResults; }
                
                #ifdef USE_MPI
@@ -49,6 +50,7 @@ class ChimeraSlayer : public Chimera {
                Database* databaseLeft;
                map<string, int> priority; //for template=self, seqname, seqAligned, abundance
                set<string> chimericSeqs; //for template=self, so we don't add chimeric sequences to the userTemplate set
+               int numNoParents;
        
                vector<data_struct>  chimeraResults;
                data_results printResults;
index 0e7e19bbda16ff8f4222562c0ffe95f78b335d35..fed36135db3116567dea8b323f9ec0803cd29da8 100644 (file)
@@ -509,6 +509,16 @@ int ChimeraSlayerCommand::execute(){
                                        //do your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
                                        
+                                       int numNoParents = chimera->getNumNoParents();
+                                       int temp;
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
+                                               numNoParents += temp;
+                                       }
+                                       
+                                       
+                                       if (numSeqs == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+                                       
                                        if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
 
                                }else{ //you are a child process
@@ -524,6 +534,9 @@ int ChimeraSlayerCommand::execute(){
                                        
                                                //do your part
                                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
+                                               
+                                               int numNoParents = chimera->getNumNoParents();
+                                               MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
                                        
                                                if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
                                
@@ -556,6 +569,9 @@ int ChimeraSlayerCommand::execute(){
                                if(processors == 1){
                                        numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
                                        
+                                       int numNoParents = chimera->getNumNoParents();
+                                       if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+                                       
                                        if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
@@ -592,6 +608,10 @@ int ChimeraSlayerCommand::execute(){
                        #else
                                numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
                                
+                               int numNoParents = chimera->getNumNoParents();
+                               if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+
+                               
                                if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                
                        #endif
@@ -670,7 +690,6 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f
                        string candidateAligned = candidateSeq->getAligned();
                        
                        if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
-                               
                                if (candidateSeq->getAligned().length() != templateSeqsLength) {  
                                        m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
                                }else{
@@ -906,9 +925,8 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                                ofstream out;
                                string tempFile = outputFileName + toString(getpid()) + ".num.temp";
                                m->openOutputFile(tempFile, out);
-                               out << num << endl;
+                               out << num << '\t' << chimera->getNumNoParents() << endl;
                                out.close();
-                               
                                exit(0);
                        }else { 
                                m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
@@ -923,14 +941,17 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                        wait(&temp);
                }
                
+               int numNoParents = 0;
                for (int i = 0; i < processIDS.size(); i++) {
                        ifstream in;
                        string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
-                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; }
                        in.close(); remove(tempFile.c_str());
                }
                
+               if (num == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+               
                return num;
 #endif         
        }
index 10f931f8230d035decdfcc282fe8de248aa0bdac..f793535bc3fb916c403f35288c19d6bbc1957b52 100644 (file)
@@ -290,7 +290,7 @@ Sequence Pintail::print(ostream& out, ostream& outAcc) {
 }
 #ifdef USE_MPI
 //***************************************************************************************************************
-Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) {
+Sequence Pintail::print(MPI_File& out, MPI_File& outAcc) {
        try {
                
                string outputString = "";
@@ -320,7 +320,7 @@ Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) {
                        MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
                        delete buf;
 
-                       return NULL;
+                       return *querySeq;
                }
                outputString += "Observed\t";
                
@@ -340,7 +340,7 @@ Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) {
                MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
                delete buf2;
                
-               return NULL;
+               return *querySeq;
        }
        catch(exception& e) {
                m->errorOut(e, "Pintail", "print");