]> git.donarmstrong.com Git - mothur.git/blob - blastdb.cpp
added logfile feature
[mothur.git] / blastdb.cpp
1 /*
2  *  blastdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/22/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "blastdb.hpp"
14
15 /**************************************************************************************************/
16
17 BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName), 
18 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
19         
20         globaldata = GlobalData::getInstance();
21         
22         mothurOut("Generating the temporary BLAST database...\t");      cout.flush();
23
24         int randNumber = rand();
25         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
26         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
27         blastFileName = toString(randNumber) + ".blast";
28
29
30         ofstream unalignedFastaFile;
31         openOutputFile(dbFileName, unalignedFastaFile);                         
32         
33         for(int i=0;i<numSeqs;i++){                                                                     //      generating a fasta file with unaligned template
34                 unalignedFastaFile << '>' << i << endl;                                 //      sequences, which will be input to formatdb
35                 unalignedFastaFile << templateSequences[i].getUnaligned() << endl;
36         }
37         unalignedFastaFile.close();
38         
39         path = globaldata->argv;
40         path = path.substr(0, (path.find_last_of('m')));
41         
42         string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
43         system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
44                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
45         mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
46         emptySequence = Sequence();
47         emptySequence.setName("no_match");
48         emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
49         emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
50
51
52 }
53
54 /**************************************************************************************************/
55
56 BlastDB::~BlastDB(){
57         //for (int i = 0; i < templateSequences.size(); i++) {  delete templateSequences[i];   }
58         //templateSequences.clear();
59         //delete emptySequence;
60         
61         remove(queryFileName.c_str());                          //      let's clean stuff up and remove the temp files
62         remove(dbFileName.c_str());                                     //      let's clean stuff up and remove the temp files
63         remove(blastFileName.c_str());                          //      let's clean stuff up and remove the temp files
64 }
65
66 /**************************************************************************************************/
67
68 Sequence BlastDB::findClosestSequence(Sequence* candidate){
69
70         ofstream queryFile;
71         openOutputFile(queryFileName, queryFile);
72         queryFile << '>' << candidate->getName() << endl;
73         queryFile << candidate->getUnaligned() << endl;
74         queryFile.close();
75         
76         
77 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
78 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
79 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
80
81         string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
82         blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
83         system(blastCommand.c_str());
84         
85         ifstream m8FileHandle;
86         openInputFile(blastFileName, m8FileHandle);
87         
88         string dummy;
89         int templateAccession;
90         gobble(m8FileHandle);
91         if(!m8FileHandle.eof()){
92                 m8FileHandle >> dummy >> templateAccession >> searchScore;
93                 m8FileHandle.close();
94                 return templateSequences[templateAccession];
95         }
96         else{
97                 searchScore = 0.00;
98                 return emptySequence;
99         }
100         m8FileHandle.close();
101 }
102
103 /**************************************************************************************************/