]> git.donarmstrong.com Git - mothur.git/blob - blastdb.cpp
6f4ebcc1fa84028113e150c39e028e340ae86c26
[mothur.git] / blastdb.cpp
1 /*
2  *  blastdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/22/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "blastdb.hpp"
14
15 /**************************************************************************************************/
16
17 BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName), 
18 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
19         
20         globaldata = GlobalData::getInstance();
21         
22         cout << "Generating the temporary BLAST database...\t"; cout.flush();
23
24         int randNumber = rand();
25         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
26         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
27         blastFileName = toString(randNumber) + ".blast";
28
29
30         ofstream unalignedFastaFile;
31         openOutputFile(dbFileName, unalignedFastaFile);                         
32         
33         for(int i=0;i<numSeqs;i++){                                                                     //      generating a fasta file with unaligned template
34                 unalignedFastaFile << '>' << i << endl;                                 //      sequences, which will be input to formatdb
35                 unalignedFastaFile << templateSequences[i]->getUnaligned() << endl;
36         }
37         unalignedFastaFile.close();
38         
39         path = globaldata->argv;
40         path = path.substr(0, (path.find_last_of('m')));
41         
42         string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
43         system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
44                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
45         cout << "DONE." << endl << endl;        cout.flush();
46         emptySequence = new Sequence();
47         emptySequence->setName("no_match");
48         emptySequence->setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
49         emptySequence->setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
50
51
52 }
53
54 /**************************************************************************************************/
55
56 BlastDB::~BlastDB(){
57         remove(queryFileName.c_str());                          //      let's clean stuff up and remove the temp files
58         remove(dbFileName.c_str());                                     //      let's clean stuff up and remove the temp files
59         remove(blastFileName.c_str());                          //      let's clean stuff up and remove the temp files
60 }
61
62 /**************************************************************************************************/
63
64 Sequence* BlastDB::findClosestSequence(Sequence* candidate){
65
66         ofstream queryFile;
67         openOutputFile(queryFileName, queryFile);
68         queryFile << '>' << candidate->getName() << endl;
69         queryFile << candidate->getUnaligned() << endl;
70         queryFile.close();
71         
72         
73 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
74 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
75 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
76
77         string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
78         blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
79         system(blastCommand.c_str());
80         
81         ifstream m8FileHandle;
82         openInputFile(blastFileName, m8FileHandle);
83         
84         string dummy;
85         int templateAccession;
86         gobble(m8FileHandle);
87         if(!m8FileHandle.eof()){
88                 m8FileHandle >> dummy >> templateAccession >> searchScore;
89                 m8FileHandle.close();
90                 return templateSequences[templateAccession];
91         }
92         else{
93                 searchScore = 0.00;
94                 return emptySequence;
95         }
96 }
97
98 /**************************************************************************************************/