]> git.donarmstrong.com Git - mothur.git/blob - blastalign.cpp
fixed blast code path problem
[mothur.git] / blastalign.cpp
1 /*
2  *  blastalign.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
9  *      probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
10  *      of the Alignment class, which requires a constructor and align method.
11  *
12  */
13
14 using namespace std;
15
16 #include "alignment.hpp"
17 #include "blastalign.hpp"
18
19
20 //**************************************************************************************************/
21
22 BlastAlignment::BlastAlignment(float go, float ge, float m, float mm) : 
23                         match(m),                               //      This is the score to award for two nucleotides matching (match >= 0)
24                         mismatch(mm)                    //      This is the penalty to assess for a mismatch (mismatch <= 0)
25 {
26         globaldata = GlobalData::getInstance();
27         path = globaldata->argv;
28         path = path.substr(0, (path.find_last_of('m')));
29         
30         gapOpen = abs(go);                              //      This is the penalty to assess for opening a gap (gapOpen >= 0)
31         gapExtend = abs(ge);                            //      This is the penalty to assess for extending a gap (gapExtend >= 0)
32                 
33         int randNumber = rand();
34         candidateFileName = toString(randNumber) + ".candidate";
35         templateFileName = toString(randNumber) + ".template";
36         blastFileName = toString(randNumber) + ".pairwise";
37 }
38
39 //**************************************************************************************************/
40
41 BlastAlignment::~BlastAlignment(){              //      The desctructor should clean up by removing the temporary 
42         remove(candidateFileName.c_str());      //      files used to run bl2seq
43         remove(templateFileName.c_str());
44         remove(blastFileName.c_str());
45 }
46
47 //**************************************************************************************************/
48
49 void BlastAlignment::align(string seqA, string seqB){   //Use blastn to align the two sequences
50
51         ofstream candidateFile(candidateFileName.c_str());      //      Write the sequence to be aligned to a temporary candidate seq file
52         candidateFile << ">candidate" << endl << seqA << endl;
53         candidateFile.close();
54         
55         ofstream templateFile(templateFileName.c_str());        //      Write the unaligned template sequence to a temporary candidate seq file
56         templateFile << ">template" << endl << seqB << endl;
57         templateFile.close();
58         
59         //      The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
60         //      that we don't want to apply any kind of complexity filtering (-F F)
61         string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
62         blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
63         blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
64         
65         system(blastCommand.c_str());   //      Here we assume that "bl2seq" is in the users path or in the same folder as
66                                                                         //      this executable
67         setPairwiseSeqs();
68 }
69
70 /**************************************************************************************************/
71
72 void BlastAlignment::setPairwiseSeqs(){ //      This method call assigns the blast generated alignment
73                                                                                                                         //      to the pairwise entry in the Sequence class for the 
74                                                                                                                         //      candidate and template Sequence objects
75         ifstream blastFile;
76         openInputFile(blastFileName, blastFile);
77         
78         seqAaln = "";
79         seqBaln = "";
80         
81         int candidateLength, templateLength;
82         char d;
83         
84         string candidateName, templateName;
85         
86         while(d=blastFile.get() != '='){};
87         blastFile >> candidateName;                                     //      Get the candidate sequence name from flatfile
88         
89         while(d=blastFile.get() != '('){};
90         blastFile >> candidateLength;                           //      Get the candidate sequence length from flatfile
91         
92         while(d=blastFile.get()){
93                 if(d == '>'){
94                         blastFile >> templateName;                      //      Get the template sequence name from flatfile
95                         break;
96                 }
97                 else if(d == '*'){                                                                      //      We go here if there is no significant match
98                         
99                         seqAstart = 0;
100                         seqBstart = 0;
101                         seqAend = 0;
102                         seqBend = 0;
103                         pairwiseLength = 0;
104                         
105 //                      string dummy;
106 //                      while(dummy != "query:"){       cout << dummy << endl;blastFile >> dummy;       }
107 //                      blastFile >> seqBend;
108 //                      cout << seqBend << endl;
109 //                      for(int i=0;i<seqBend;i++){
110 //                              seqAaln += 'Z';
111 //                              seqBaln += 'X';
112 //                      }
113 //                      pairwiseLength = 0;
114                         return;
115                 }
116         }
117         
118         while(d=blastFile.get() != '='){};
119         blastFile >> templateLength;                            //      Get the template sequence length from flatfile
120                 
121         while(d=blastFile.get() != 'Q'){};                      //      Suck up everything else until we get to the start of the alignment
122         int queryStart, sbjctStart, queryEnd, sbjctEnd;
123         string queryLabel, sbjctLabel, query, sbjct;
124
125         blastFile >> queryLabel;        queryLabel = 'Q' + queryLabel;
126
127         
128         while(queryLabel == "Query:"){
129                 blastFile >> queryStart >> query >> queryEnd;
130                 
131                 while(d=blastFile.get() != 'S'){};
132                 
133                 blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
134                 
135                 if(seqAaln == ""){
136                         seqAstart = queryStart;
137                         seqBstart = sbjctStart;
138                 }
139
140                 seqAaln += query;                                       //      concatenate each line of the sequence to what we already have
141                 seqBaln += sbjct;                                       //      for the query and template (subject) sequence
142                 
143                 blastFile >> queryLabel;
144         }
145         seqAend = queryEnd;
146         seqBend = sbjctEnd;
147         pairwiseLength = seqAaln.length();
148
149         for(int i=1;i<seqBstart;i++){                           //      Since the alignments don't always start at (1, 1), we need to pad
150                 seqAaln = 'Z' + seqAaln;                                //      the sequences so that they start at the same point
151                 seqBaln = 'X' + seqBaln;
152         }
153         
154         for(int i=seqBend+1;i<=templateLength;i++){     //      since the sequences don't necessarily end at the same point, we
155                 seqAaln += 'Z';                                                 //      again need ot pad the sequences so that they extend to the length
156                 seqBaln += 'X';                                                 //      of the template sequence
157         }
158 }
159
160 //**************************************************************************************************/