]> git.donarmstrong.com Git - mothur.git/blob - blastalign.cpp
added alignment code
[mothur.git] / blastalign.cpp
1 /*
2  *  blastalign.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
9  *      probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
10  *      of the Alignment class, which requires a constructor and align method.
11  *
12  */
13
14 using namespace std;
15
16 #include "alignment.hpp"
17 #include "blastalign.hpp"
18
19
20 //**************************************************************************************************/
21
22 BlastAlignment::BlastAlignment(float go, float ge, float m, float mm) : 
23                         match(m),                               //      This is the score to award for two nucleotides matching (match >= 0)
24                         mismatch(mm)                    //      This is the penalty to assess for a mismatch (mismatch <= 0)
25 {
26         gapOpen = abs(go);                              //      This is the penalty to assess for opening a gap (gapOpen >= 0)
27         gapExtend = abs(ge);                            //      This is the penalty to assess for extending a gap (gapExtend >= 0)
28                 
29         int randNumber = rand();
30         candidateFileName = toString(randNumber) + ".candidate";
31         templateFileName = toString(randNumber) + ".template";
32         blastFileName = toString(randNumber) + ".pairwise";
33 }
34
35 //**************************************************************************************************/
36
37 BlastAlignment::~BlastAlignment(){              //      The desctructor should clean up by removing the temporary 
38         remove(candidateFileName.c_str());      //      files used to run bl2seq
39         remove(templateFileName.c_str());
40         remove(blastFileName.c_str());
41 }
42
43 //**************************************************************************************************/
44
45 void BlastAlignment::align(string seqA, string seqB){   //Use blastn to align the two sequences
46
47         ofstream candidateFile(candidateFileName.c_str());      //      Write the sequence to be aligned to a temporary candidate seq file
48         candidateFile << ">candidate" << endl << seqA << endl;
49         candidateFile.close();
50         
51         ofstream templateFile(templateFileName.c_str());        //      Write the unaligned template sequence to a temporary candidate seq file
52         templateFile << ">template" << endl << seqB << endl;
53         templateFile.close();
54         
55         //      The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
56         //      that we don't want to apply any kind of complexity filtering (-F F)
57         string blastCommand = "~/Pipeline/src/cpp/production/blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
58         blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
59         blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
60         
61         system(blastCommand.c_str());   //      Here we assume that "bl2seq" is in the users path or in the same folder as
62                                                                         //      this executable
63         setPairwiseSeqs();
64 }
65
66 /**************************************************************************************************/
67
68 void BlastAlignment::setPairwiseSeqs(){ //      This method call assigns the blast generated alignment
69                                                                                                                         //      to the pairwise entry in the Sequence class for the 
70                                                                                                                         //      candidate and template Sequence objects
71         ifstream blastFile;
72         openInputFile(blastFileName, blastFile);
73         
74         seqAaln = "";
75         seqBaln = "";
76         
77         int candidateLength, templateLength;
78         char d;
79         
80         string candidateName, templateName;
81         
82         while(d=blastFile.get() != '='){};
83         blastFile >> candidateName;                                     //      Get the candidate sequence name from flatfile
84         
85         while(d=blastFile.get() != '('){};
86         blastFile >> candidateLength;                           //      Get the candidate sequence length from flatfile
87         
88         while(d=blastFile.get()){
89                 if(d == '>'){
90                         blastFile >> templateName;                      //      Get the template sequence name from flatfile
91                         break;
92                 }
93                 else if(d == '*'){                                                                      //      We go here if there is no significant match
94                         
95                         seqAstart = 0;
96                         seqBstart = 0;
97                         seqAend = 0;
98                         seqBend = 0;
99                         pairwiseLength = 0;
100                         
101 //                      string dummy;
102 //                      while(dummy != "query:"){       cout << dummy << endl;blastFile >> dummy;       }
103 //                      blastFile >> seqBend;
104 //                      cout << seqBend << endl;
105 //                      for(int i=0;i<seqBend;i++){
106 //                              seqAaln += 'Z';
107 //                              seqBaln += 'X';
108 //                      }
109 //                      pairwiseLength = 0;
110                         return;
111                 }
112         }
113         
114         while(d=blastFile.get() != '='){};
115         blastFile >> templateLength;                            //      Get the template sequence length from flatfile
116                 
117         while(d=blastFile.get() != 'Q'){};                      //      Suck up everything else until we get to the start of the alignment
118         int queryStart, sbjctStart, queryEnd, sbjctEnd;
119         string queryLabel, sbjctLabel, query, sbjct;
120
121         blastFile >> queryLabel;        queryLabel = 'Q' + queryLabel;
122
123         
124         while(queryLabel == "Query:"){
125                 blastFile >> queryStart >> query >> queryEnd;
126                 
127                 while(d=blastFile.get() != 'S'){};
128                 
129                 blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
130                 
131                 if(seqAaln == ""){
132                         seqAstart = queryStart;
133                         seqBstart = sbjctStart;
134                 }
135
136                 seqAaln += query;                                       //      concatenate each line of the sequence to what we already have
137                 seqBaln += sbjct;                                       //      for the query and template (subject) sequence
138                 
139                 blastFile >> queryLabel;
140         }
141         seqAend = queryEnd;
142         seqBend = sbjctEnd;
143         pairwiseLength = seqAaln.length();
144
145         for(int i=1;i<seqBstart;i++){                           //      Since the alignments don't always start at (1, 1), we need to pad
146                 seqAaln = 'Z' + seqAaln;                                //      the sequences so that they start at the same point
147                 seqBaln = 'X' + seqBaln;
148         }
149         
150         for(int i=seqBend+1;i<=templateLength;i++){     //      since the sequences don't necessarily end at the same point, we
151                 seqAaln += 'Z';                                                 //      again need ot pad the sequences so that they extend to the length
152                 seqBaln += 'X';                                                 //      of the template sequence
153         }
154 }
155
156 //**************************************************************************************************/