]> git.donarmstrong.com Git - mothur.git/blob - blastdb.cpp
added MPI code, broke up chimera.seqs into 5 separated commands, added parse.sff...
[mothur.git] / blastdb.cpp
1 /*
2  *  blastdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/22/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "blastdb.hpp"
14
15 /**************************************************************************************************/
16
17 BlastDB::BlastDB(float gO, float gE, float m, float mM) : Database(), 
18 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
19         
20         globaldata = GlobalData::getInstance();
21         count = 0;
22
23         int randNumber = rand();
24         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
25         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
26         blastFileName = toString(randNumber) + ".blast";
27
28 }
29 /**************************************************************************************************/
30
31 BlastDB::BlastDB() : Database() {
32         
33         globaldata = GlobalData::getInstance();
34         count = 0;
35
36         int randNumber = rand();
37         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
38         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
39         blastFileName = toString(randNumber) + ".blast";
40
41 }
42
43 /**************************************************************************************************/
44
45 BlastDB::~BlastDB(){
46         remove(queryFileName.c_str());                          //      let's clean stuff up and remove the temp files
47         remove(dbFileName.c_str());                                     //      let's clean stuff up and remove the temp files
48         remove(blastFileName.c_str());                          //      let's clean stuff up and remove the temp files
49 }
50 /**************************************************************************************************/
51 //assumes you have added all the template sequences using the addSequence function and run generateDB.
52 vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
53         try{
54                 vector<int> topMatches;
55                 
56                 ofstream queryFile;
57                 openOutputFile(queryFileName, queryFile);
58                 queryFile << '>' << seq->getName() << endl;
59                 queryFile << seq->getUnaligned() << endl;
60                 queryFile.close();
61                                 
62                 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
63                 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
64                 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
65                 
66                 string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
67                 blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
68                 system(blastCommand.c_str());
69                 
70                 ifstream m8FileHandle;
71                 openInputFile(blastFileName, m8FileHandle);
72                 
73                 string dummy;
74                 int templateAccession;
75                 gobble(m8FileHandle);
76                 
77                 while(!m8FileHandle.eof()){
78                         m8FileHandle >> dummy >> templateAccession >> searchScore;
79                         
80                         //get rest of junk in line
81                         while (!m8FileHandle.eof())     {       char c = m8FileHandle.get(); if (c == 10 || c == 13){   break;  }       } 
82                         
83                         gobble(m8FileHandle);
84                         topMatches.push_back(templateAccession);
85                 }
86                 m8FileHandle.close();
87                 
88                 string root = dbFileName;
89                 string temp = dbFileName + ".nsq";
90                 remove(temp.c_str());   
91                 temp = dbFileName + ".nsi";
92                 remove(temp.c_str());
93                 
94                 temp = dbFileName + ".nsd";
95                 remove(temp.c_str());   
96
97                 temp = dbFileName + ".nin";
98                 remove(temp.c_str());   
99
100                 temp = dbFileName + ".nhr";
101                 remove(temp.c_str());   
102         
103
104                 
105                 return topMatches;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "BlastDB", "findClosestSequences");
109                 exit(1);
110         }
111
112 }
113 /**************************************************************************************************/
114 //assumes you have added all the template sequences using the addSequence function and run generateDB.
115 vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n) {
116         try{
117                 vector<int> topMatches;
118                 
119                 ofstream queryFile;
120                 openOutputFile(queryFileName, queryFile);
121                 queryFile << '>' << seq->getName() << endl;
122                 queryFile << seq->getUnaligned() << endl;
123                 queryFile.close();
124                                 
125                 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
126                 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
127                 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
128         
129                 string blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
130                 blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
131                 system(blastCommand.c_str());
132                 
133                 ifstream m8FileHandle;
134                 openInputFile(blastFileName, m8FileHandle, "no error");
135         
136                 string dummy;
137                 int templateAccession;
138                 gobble(m8FileHandle);
139                 
140                 while(!m8FileHandle.eof()){
141                         m8FileHandle >> dummy >> templateAccession >> searchScore;
142                         
143                         //get rest of junk in line
144                         while (!m8FileHandle.eof())     {       char c = m8FileHandle.get(); if (c == 10 || c == 13){   break;  }       } 
145                         
146                         gobble(m8FileHandle);
147                         topMatches.push_back(templateAccession);
148 //cout << templateAccession << endl;
149                 }
150                 m8FileHandle.close();
151 //cout << "\n\n" ;              
152                 return topMatches;
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "BlastDB", "findClosest");
156                 exit(1);
157         }
158 }
159 /**************************************************************************************************/
160 void BlastDB::addSequence(Sequence seq) {
161         try {
162         
163                 ofstream unalignedFastaFile;
164                 openOutputFileAppend(dbFileName, unalignedFastaFile);                           
165         
166                 //      generating a fasta file with unaligned template
167                 unalignedFastaFile << '>' << count << endl;                                     //      sequences, which will be input to formatdb
168                 unalignedFastaFile << seq.getUnaligned() << endl;
169                 unalignedFastaFile.close();
170                 
171                 count++;
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "BlastDB", "addSequence");
175                 exit(1);
176         }
177 }
178 /**************************************************************************************************/
179 void BlastDB::generateDB() {
180         try {
181         
182                 //m->mothurOut("Generating the temporary BLAST database...\t"); cout.flush();
183                 
184                 path = globaldata->argv;
185                 path = path.substr(0, (path.find_last_of('m')));
186         
187                 string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
188                 system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
189                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
190                 //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); cout.flush();
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "BlastDB", "generateDB");
194                 exit(1);
195         }
196 }
197 #ifdef USE_MPI  
198 /**************************************************************************************************/
199 int BlastDB::MPISend(int receiver) {
200         try {
201                 
202                 //send gapOpen - float
203                 MPI_Send(&gapOpen, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
204
205                 //send gapExtend - float
206                 MPI_Send(&gapExtend, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
207                 
208                 //send match - float
209                 MPI_Send(&match, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
210                 
211                 //send mismatch - float
212                 MPI_Send(&misMatch, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
213                                                                         
214                 return 0;
215         }
216         catch(exception& e) {
217                 m->errorOut(e, "BlastDB", "MPISend");
218                 exit(1);
219         }
220 }
221 /**************************************************************************************************/
222 int BlastDB::MPIRecv(int sender) {
223         try {
224                 MPI_Status status;
225                 
226                 //receive gapOpen - float
227                 MPI_Recv(&gapOpen, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
228                 
229                 //receive gapExtend - float
230                 MPI_Recv(&gapExtend, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
231                                 
232                 //receive match - float
233                 MPI_Recv(&match, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
234                 
235                 //receive mismatch - float
236                 MPI_Recv(&misMatch, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);               
237                 
238                 return 0;
239         }
240         catch(exception& e) {
241                 m->errorOut(e, "BlastDB", "MPIRecv");
242                 exit(1);
243         }
244 }
245 #endif  
246 /**************************************************************************************************/
247
248 /**************************************************************************************************/
249