]> git.donarmstrong.com Git - mothur.git/blob - blastalign.cpp
changes while testing
[mothur.git] / blastalign.cpp
1 /*
2  *  blastalign.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
9  *      probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
10  *      of the Alignment class, which requires a constructor and align method.
11  *
12  */
13
14
15 #include "alignment.hpp"
16 #include "blastalign.hpp"
17
18
19 //**************************************************************************************************/
20
21 BlastAlignment::BlastAlignment(float go, float ge, float ma, float mm) : 
22                         match(ma),                              //      This is the score to award for two nucleotides matching (match >= 0)
23                         mismatch(mm)                    //      This is the penalty to assess for a mismatch (mismatch <= 0)
24 {
25         path = m->argv;
26         path = path.substr(0, (path.find_last_of('m')));
27         
28         gapOpen = abs(go);                              //      This is the penalty to assess for opening a gap (gapOpen >= 0)
29         gapExtend = abs(ge);                            //      This is the penalty to assess for extending a gap (gapExtend >= 0)
30                 
31         int randNumber = rand();
32         candidateFileName = toString(randNumber) + ".candidate";
33         templateFileName = toString(randNumber) + ".template";
34         blastFileName = toString(randNumber) + ".pairwise";
35 }
36
37 //**************************************************************************************************/
38
39 BlastAlignment::~BlastAlignment(){              //      The desctructor should clean up by removing the temporary 
40         m->mothurRemove(candidateFileName);     //      files used to run bl2seq
41         m->mothurRemove(templateFileName);
42         m->mothurRemove(blastFileName);
43 }
44
45 //**************************************************************************************************/
46
47 void BlastAlignment::align(string seqA, string seqB){   //Use blastn to align the two sequences
48
49         ofstream candidateFile(candidateFileName.c_str());      //      Write the sequence to be aligned to a temporary candidate seq file
50         candidateFile << ">candidate" << endl << seqA << endl;
51         candidateFile.close();
52         
53         ofstream templateFile(templateFileName.c_str());        //      Write the unaligned template sequence to a temporary candidate seq file
54         templateFile << ">template" << endl << seqB << endl;
55         templateFile.close();
56         
57         //      The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
58         //      that we don't want to apply any kind of complexity filtering (-F F)
59         string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
60         blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
61         blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
62         
63         system(blastCommand.c_str());   //      Here we assume that "bl2seq" is in the users path or in the same folder as
64                                                                         //      this executable
65         setPairwiseSeqs();
66 }
67
68 /**************************************************************************************************/
69
70 void BlastAlignment::setPairwiseSeqs(){ //      This method call assigns the blast generated alignment
71                                                                                                                         //      to the pairwise entry in the Sequence class for the 
72                                                                                                                         //      candidate and template Sequence objects
73         ifstream blastFile;
74         m->openInputFile(blastFileName, blastFile);
75         
76         seqAaln = "";
77         seqBaln = "";
78         
79         int candidateLength, templateLength;
80         char d;
81         
82         string candidateName, templateName;
83         
84         while((d=blastFile.get()) != '='){}
85         blastFile >> candidateName;                                     //      Get the candidate sequence name from flatfile
86         
87         while((d=blastFile.get()) != '('){}
88         blastFile >> candidateLength;                           //      Get the candidate sequence length from flatfile
89         
90         while((d=blastFile.get())){
91                 if(d == '>'){
92                         blastFile >> templateName;                      //      Get the template sequence name from flatfile
93                         break;
94                 }
95                 else if(d == '*'){                                                                      //      We go here if there is no significant match
96                         
97                         seqAstart = 0;
98                         seqBstart = 0;
99                         seqAend = 0;
100                         seqBend = 0;
101                         pairwiseLength = 0;
102                         
103 //                      string dummy;
104 //                      while(dummy != "query:"){       m->mothurOut(dummy, ""); m->mothurOutEndLine(); blastFile >> dummy;     }
105 //                      blastFile >> seqBend;
106 //                      m->mothurOut(toString(seqBend), ""); m->mothurOutEndLine();
107 //                      for(int i=0;i<seqBend;i++){
108 //                              seqAaln += 'Z';
109 //                              seqBaln += 'X';
110 //                      }
111 //                      pairwiseLength = 0;
112                         return;
113                 }
114         }
115         
116         while((d=blastFile.get()) != '='){}
117         blastFile >> templateLength;                            //      Get the template sequence length from flatfile
118                 
119         while((d=blastFile.get()) != 'Q'){}                     //      Suck up everything else until we get to the start of the alignment
120         int queryStart, sbjctStart, queryEnd, sbjctEnd;
121         string queryLabel, sbjctLabel, query, sbjct;
122
123         blastFile >> queryLabel;        queryLabel = 'Q' + queryLabel;
124
125         
126         while(queryLabel == "Query:"){
127                 blastFile >> queryStart >> query >> queryEnd;
128                 
129                 while((d=blastFile.get()) != 'S'){};
130                 
131                 blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
132                 
133                 if(seqAaln == ""){
134                         seqAstart = queryStart;
135                         seqBstart = sbjctStart;
136                 }
137
138                 seqAaln += query;                                       //      concatenate each line of the sequence to what we already have
139                 seqBaln += sbjct;                                       //      for the query and template (subject) sequence
140                 
141                 blastFile >> queryLabel;
142         }
143         seqAend = queryEnd;
144         seqBend = sbjctEnd;
145         pairwiseLength = seqAaln.length();
146
147         for(int i=1;i<seqBstart;i++){                           //      Since the alignments don't always start at (1, 1), we need to pad
148                 seqAaln = 'Z' + seqAaln;                                //      the sequences so that they start at the same point
149                 seqBaln = 'X' + seqBaln;
150         }
151         
152         for(int i=seqBend+1;i<=templateLength;i++){     //      since the sequences don't necessarily end at the same point, we
153                 seqAaln += 'Z';                                                 //      again need ot pad the sequences so that they extend to the length
154                 seqBaln += 'X';                                                 //      of the template sequence
155         }
156         blastFile.close();
157 }
158
159 //**************************************************************************************************/