]> git.donarmstrong.com Git - mothur.git/blob - blastdb.cpp
b52c82e58d0bb7c441e9b9df324461b4b9a8df60
[mothur.git] / blastdb.cpp
1 /*
2  *  blastdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/22/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "blastdb.hpp"
14
15 /**************************************************************************************************/
16
17 BlastDB::BlastDB(float gO, float gE, float m, float mM) : Database(), 
18 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
19         
20         globaldata = GlobalData::getInstance();
21         count = 0;
22
23         int randNumber = rand();
24         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
25         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
26         blastFileName = toString(randNumber) + ".blast";
27
28 }
29 /**************************************************************************************************/
30
31 BlastDB::BlastDB() : Database() {
32         
33         globaldata = GlobalData::getInstance();
34         count = 0;
35
36         int randNumber = rand();
37         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
38         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
39         blastFileName = toString(randNumber) + ".blast";
40
41 }
42
43 /**************************************************************************************************/
44
45 BlastDB::~BlastDB(){
46         remove(queryFileName.c_str());                          //      let's clean stuff up and remove the temp files
47         remove(dbFileName.c_str());                                     //      let's clean stuff up and remove the temp files
48         remove(blastFileName.c_str());                          //      let's clean stuff up and remove the temp files
49 }
50 /**************************************************************************************************/
51 //assumes you have added all the template sequences using the addSequence function and run generateDB.
52 vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
53         try{
54                 vector<int> topMatches;
55                 
56                 ofstream queryFile;
57                 openOutputFile(queryFileName, queryFile);
58                 queryFile << '>' << seq->getName() << endl;
59                 queryFile << seq->getUnaligned() << endl;
60                 queryFile.close();
61                                 
62                 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
63                 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
64                 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
65                 
66                 string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
67                 blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
68                 system(blastCommand.c_str());
69                 
70                 ifstream m8FileHandle;
71                 openInputFile(blastFileName, m8FileHandle);
72                 
73                 string dummy;
74                 int templateAccession;
75                 gobble(m8FileHandle);
76                 
77                 while(!m8FileHandle.eof()){
78                         m8FileHandle >> dummy >> templateAccession >> searchScore;
79                         
80                         //get rest of junk in line
81                         while (!m8FileHandle.eof())     {       char c = m8FileHandle.get(); if (c == 10 || c == 13){   break;  }       } 
82                         
83                         gobble(m8FileHandle);
84                         topMatches.push_back(templateAccession);
85                 }
86                 m8FileHandle.close();
87                 
88                 return topMatches;
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "BlastDB", "findClosestSequences");
92                 exit(1);
93         }
94
95 }
96 /**************************************************************************************************/
97 //assumes you have added all the template sequences using the addSequence function and run generateDB.
98 vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n) {
99         try{
100                 vector<int> topMatches;
101                 
102                 ofstream queryFile;
103                 openOutputFile(queryFileName, queryFile);
104                 queryFile << '>' << seq->getName() << endl;
105                 queryFile << seq->getUnaligned() << endl;
106                 queryFile.close();
107                                 
108                 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
109                 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
110                 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
111         
112                 string blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
113                 blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
114                 system(blastCommand.c_str());
115                 
116                 ifstream m8FileHandle;
117                 openInputFile(blastFileName, m8FileHandle, "no error");
118         
119                 string dummy;
120                 int templateAccession;
121                 gobble(m8FileHandle);
122                 
123                 while(!m8FileHandle.eof()){
124                         m8FileHandle >> dummy >> templateAccession >> searchScore;
125                         
126                         //get rest of junk in line
127                         while (!m8FileHandle.eof())     {       char c = m8FileHandle.get(); if (c == 10 || c == 13){   break;  }       } 
128                         
129                         gobble(m8FileHandle);
130                         topMatches.push_back(templateAccession);
131 //cout << templateAccession << endl;
132                 }
133                 m8FileHandle.close();
134 //cout << "\n\n" ;              
135                 return topMatches;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "BlastDB", "findClosest");
139                 exit(1);
140         }
141 }
142 /**************************************************************************************************/
143 void BlastDB::addSequence(Sequence seq) {
144         try {
145         
146                 ofstream unalignedFastaFile;
147                 openOutputFileAppend(dbFileName, unalignedFastaFile);                           
148         
149                 //      generating a fasta file with unaligned template
150                 unalignedFastaFile << '>' << count << endl;                                     //      sequences, which will be input to formatdb
151                 unalignedFastaFile << seq.getUnaligned() << endl;
152                 unalignedFastaFile.close();
153         
154                 count++;
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "BlastDB", "addSequence");
158                 exit(1);
159         }
160 }
161 /**************************************************************************************************/
162 void BlastDB::generateDB() {
163         try {
164         
165                 //m->mothurOut("Generating the temporary BLAST database...\t"); cout.flush();
166                 
167                 path = globaldata->argv;
168                 path = path.substr(0, (path.find_last_of('m')));
169         
170                 string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
171                 system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
172                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
173                 //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); cout.flush();
174         }
175         catch(exception& e) {
176                 m->errorOut(e, "BlastDB", "generateDB");
177                 exit(1);
178         }
179 }
180 #ifdef USE_MPI  
181 /**************************************************************************************************/
182 int BlastDB::MPISend(int receiver) {
183         try {
184                 
185                 //send gapOpen - float
186                 MPI_Send(&gapOpen, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
187
188                 //send gapExtend - float
189                 MPI_Send(&gapExtend, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
190                 
191                 //send match - float
192                 MPI_Send(&match, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
193                 
194                 //send mismatch - float
195                 MPI_Send(&misMatch, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
196                                                                         
197                 return 0;
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "BlastDB", "MPISend");
201                 exit(1);
202         }
203 }
204 /**************************************************************************************************/
205 int BlastDB::MPIRecv(int sender) {
206         try {
207                 MPI_Status status;
208                 
209                 //receive gapOpen - float
210                 MPI_Recv(&gapOpen, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
211                 
212                 //receive gapExtend - float
213                 MPI_Recv(&gapExtend, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
214                                 
215                 //receive match - float
216                 MPI_Recv(&match, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
217                 
218                 //receive mismatch - float
219                 MPI_Recv(&misMatch, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);               
220                 
221                 return 0;
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "BlastDB", "MPIRecv");
225                 exit(1);
226         }
227 }
228 #endif  
229 /**************************************************************************************************/
230
231 /**************************************************************************************************/
232