]> git.donarmstrong.com Git - mothur.git/blob - blastdb.cpp
82b104e0380b2f8794ebf9e0ce2693d835dd8cc0
[mothur.git] / blastdb.cpp
1 /*
2  *  blastdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/22/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12
13 #include "database.hpp"
14 #include "sequence.hpp"
15 #include "blastdb.hpp"
16
17 /**************************************************************************************************/
18
19 BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName), 
20 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
21         
22         globaldata = GlobalData::getInstance();
23         
24         cout << "Generating the temporary BLAST database...\t"; cout.flush();
25
26         int randNumber = rand();
27         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
28         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
29         blastFileName = toString(randNumber) + ".blast";
30
31
32         ofstream unalignedFastaFile;
33         openOutputFile(dbFileName, unalignedFastaFile);                         
34         
35         for(int i=0;i<numSeqs;i++){                                                                     //      generating a fasta file with unaligned template
36                 unalignedFastaFile << '>' << i << endl;                                 //      sequences, which will be input to formatdb
37                 unalignedFastaFile << templateSequences[i]->getUnaligned() << endl;
38         }
39         unalignedFastaFile.close();
40         
41         path = globaldata->argv;
42         path = path.substr(0, (path.find_last_of('m')));
43         
44         string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
45         system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
46                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
47         cout << "DONE." << endl << endl;        cout.flush();
48         emptySequence = new Sequence();
49         emptySequence->setName("no_match");
50         emptySequence->setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
51         emptySequence->setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
52
53
54 }
55
56 /**************************************************************************************************/
57
58 BlastDB::~BlastDB(){
59         remove(queryFileName.c_str());                          //      let's clean stuff up and remove the temp files
60         remove(dbFileName.c_str());                                     //      let's clean stuff up and remove the temp files
61         remove(blastFileName.c_str());                          //      let's clean stuff up and remove the temp files
62 }
63
64 /**************************************************************************************************/
65
66 Sequence* BlastDB::findClosestSequence(Sequence* candidate){
67
68         ofstream queryFile;
69         openOutputFile(queryFileName, queryFile);
70         queryFile << '>' << candidate->getName() << endl;
71         queryFile << candidate->getUnaligned() << endl;
72         queryFile.close();
73         
74         
75 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
76 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
77 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
78
79         string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
80         blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
81         system(blastCommand.c_str());
82         
83         ifstream m8FileHandle;
84         openInputFile(blastFileName, m8FileHandle);
85         
86         string dummy;
87         int templateAccession;
88         gobble(m8FileHandle);
89         if(!m8FileHandle.eof()){
90                 m8FileHandle >> dummy >> templateAccession >> searchScore;
91                 m8FileHandle.close();
92                 return templateSequences[templateAccession];
93         }
94         else{
95                 searchScore = 0.00;
96                 return emptySequence;
97         }
98 }
99
100 /**************************************************************************************************/