]> git.donarmstrong.com Git - mothur.git/blob - blastdb.cpp
added alignment code
[mothur.git] / blastdb.cpp
1 /*
2  *  blastdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/22/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12
13 #include "database.hpp"
14 #include "sequence.hpp"
15 #include "blastdb.hpp"
16
17 /**************************************************************************************************/
18
19 BlastDB::BlastDB(string fastaFileName, float gO, float gE, float m, float mM) : Database(fastaFileName), 
20 gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
21
22         cout << "Generating the temporary BLAST database...\t"; cout.flush();
23
24         int randNumber = rand();
25         dbFileName = toString(randNumber) + ".template.unaligned.fasta";
26         queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
27         blastFileName = toString(randNumber) + ".blast";
28
29
30         ofstream unalignedFastaFile;
31         openOutputFile(dbFileName, unalignedFastaFile);                         
32         
33         for(int i=0;i<numSeqs;i++){                                                                     //      generating a fasta file with unaligned template
34                 unalignedFastaFile << '>' << i << endl;                                 //      sequences, which will be input to formatdb
35                 unalignedFastaFile << templateSequences[i]->getUnaligned() << endl;
36         }
37         unalignedFastaFile.close();
38         
39         string formatdbCommand = "~/Pipeline/src/cpp/production/blast/bin/formatdb -p F -o T -i " + dbFileName; //      format the database, -o option gives us the ability
40         system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
41                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
42         cout << "DONE." << endl << endl;        cout.flush();
43         emptySequence = new Sequence();
44         emptySequence->setName("no_match");
45         emptySequence->setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
46         emptySequence->setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
47
48
49 }
50
51 /**************************************************************************************************/
52
53 BlastDB::~BlastDB(){
54         remove(queryFileName.c_str());                          //      let's clean stuff up and remove the temp files
55         remove(dbFileName.c_str());                                     //      let's clean stuff up and remove the temp files
56         remove(blastFileName.c_str());                          //      let's clean stuff up and remove the temp files
57 }
58
59 /**************************************************************************************************/
60
61 Sequence* BlastDB::findClosestSequence(Sequence* candidate){
62
63         ofstream queryFile;
64         openOutputFile(queryFileName, queryFile);
65         queryFile << '>' << candidate->getName() << endl;
66         queryFile << candidate->getUnaligned() << endl;
67         queryFile.close();
68         
69         
70 //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
71 //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
72 //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
73
74         string blastCommand = "~/Pipeline/src/cpp/production/blast/bin/blastall -p blastn -d " + dbFileName + " -b 1 -m 8 -W 28";
75         blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
76         system(blastCommand.c_str());
77         
78         ifstream m8FileHandle;
79         openInputFile(blastFileName, m8FileHandle);
80         
81         string dummy;
82         int templateAccession;
83         gobble(m8FileHandle);
84         if(!m8FileHandle.eof()){
85                 m8FileHandle >> dummy >> templateAccession >> searchScore;
86                 m8FileHandle.close();
87                 return templateSequences[templateAccession];
88         }
89         else{
90                 searchScore = 0.00;
91                 return emptySequence;
92         }
93 }
94
95 /**************************************************************************************************/