5 * Created by Pat Schloss on 12/22/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "blastdb.hpp"
15 /**************************************************************************************************/
17 BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName),
18 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
20 globaldata = GlobalData::getInstance();
22 mothurOut("Generating the temporary BLAST database...\t"); cout.flush();
24 int randNumber = rand();
25 dbFileName = toString(randNumber) + ".template.unaligned.fasta";
26 queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
27 blastFileName = toString(randNumber) + ".blast";
30 ofstream unalignedFastaFile;
31 openOutputFile(dbFileName, unalignedFastaFile);
33 for(int i=0;i<numSeqs;i++){ // generating a fasta file with unaligned template
34 unalignedFastaFile << '>' << i << endl; // sequences, which will be input to formatdb
35 unalignedFastaFile << templateSequences[i].getUnaligned() << endl;
37 unalignedFastaFile.close();
39 path = globaldata->argv;
40 path = path.substr(0, (path.find_last_of('m')));
42 string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability
43 system(formatdbCommand.c_str()); // to get the right sequence names, i think. -p F
44 // option tells formatdb that seqs are DNA, not prot
45 mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
46 emptySequence = Sequence();
47 emptySequence.setName("no_match");
48 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
49 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
54 /**************************************************************************************************/
57 //for (int i = 0; i < templateSequences.size(); i++) { delete templateSequences[i]; }
58 //templateSequences.clear();
59 //delete emptySequence;
61 remove(queryFileName.c_str()); // let's clean stuff up and remove the temp files
62 remove(dbFileName.c_str()); // let's clean stuff up and remove the temp files
63 remove(blastFileName.c_str()); // let's clean stuff up and remove the temp files
66 /**************************************************************************************************/
68 Sequence BlastDB::findClosestSequence(Sequence* candidate){
71 openOutputFile(queryFileName, queryFile);
72 queryFile << '>' << candidate->getName() << endl;
73 queryFile << candidate->getUnaligned() << endl;
77 // the goal here is to quickly survey the database to find the closest match. To do this we are using the default
78 // wordsize used in megablast. I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
79 // long. With this setting, it seems comparable in speed to the suffix tree approach.
81 string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
82 blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
83 system(blastCommand.c_str());
85 ifstream m8FileHandle;
86 openInputFile(blastFileName, m8FileHandle);
89 int templateAccession;
91 if(!m8FileHandle.eof()){
92 m8FileHandle >> dummy >> templateAccession >> searchScore;
94 return templateSequences[templateAccession];
100 m8FileHandle.close();
103 /**************************************************************************************************/