]> git.donarmstrong.com Git - mothur.git/blob - blastalign.cpp
created mothurOut class to handle logfiles
[mothur.git] / blastalign.cpp
1 /*
2  *  blastalign.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a basic alignment method that gets the blast program to do the heavy lifting.  In the future, we should
9  *      probably incorporate NCBI's library so that we don't have to call on a user-supplied executable.  This is a child
10  *      of the Alignment class, which requires a constructor and align method.
11  *
12  */
13
14
15 #include "alignment.hpp"
16 #include "blastalign.hpp"
17
18
19 //**************************************************************************************************/
20
21 BlastAlignment::BlastAlignment(float go, float ge, float m, float mm) : 
22                         match(m),                               //      This is the score to award for two nucleotides matching (match >= 0)
23                         mismatch(mm)                    //      This is the penalty to assess for a mismatch (mismatch <= 0)
24 {
25         globaldata = GlobalData::getInstance();
26         path = globaldata->argv;
27         path = path.substr(0, (path.find_last_of('m')));
28         
29         gapOpen = abs(go);                              //      This is the penalty to assess for opening a gap (gapOpen >= 0)
30         gapExtend = abs(ge);                            //      This is the penalty to assess for extending a gap (gapExtend >= 0)
31                 
32         int randNumber = rand();
33         candidateFileName = toString(randNumber) + ".candidate";
34         templateFileName = toString(randNumber) + ".template";
35         blastFileName = toString(randNumber) + ".pairwise";
36 }
37
38 //**************************************************************************************************/
39
40 BlastAlignment::~BlastAlignment(){              //      The desctructor should clean up by removing the temporary 
41         remove(candidateFileName.c_str());      //      files used to run bl2seq
42         remove(templateFileName.c_str());
43         remove(blastFileName.c_str());
44 }
45
46 //**************************************************************************************************/
47
48 void BlastAlignment::align(string seqA, string seqB){   //Use blastn to align the two sequences
49
50         ofstream candidateFile(candidateFileName.c_str());      //      Write the sequence to be aligned to a temporary candidate seq file
51         candidateFile << ">candidate" << endl << seqA << endl;
52         candidateFile.close();
53         
54         ofstream templateFile(templateFileName.c_str());        //      Write the unaligned template sequence to a temporary candidate seq file
55         templateFile << ">template" << endl << seqB << endl;
56         templateFile.close();
57         
58         //      The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
59         //      that we don't want to apply any kind of complexity filtering (-F F)
60         string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
61         blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
62         blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
63         
64         system(blastCommand.c_str());   //      Here we assume that "bl2seq" is in the users path or in the same folder as
65                                                                         //      this executable
66         setPairwiseSeqs();
67 }
68
69 /**************************************************************************************************/
70
71 void BlastAlignment::setPairwiseSeqs(){ //      This method call assigns the blast generated alignment
72                                                                                                                         //      to the pairwise entry in the Sequence class for the 
73                                                                                                                         //      candidate and template Sequence objects
74         ifstream blastFile;
75         openInputFile(blastFileName, blastFile);
76         
77         seqAaln = "";
78         seqBaln = "";
79         
80         int candidateLength, templateLength;
81         char d;
82         
83         string candidateName, templateName;
84         
85         while((d=blastFile.get()) != '='){}
86         blastFile >> candidateName;                                     //      Get the candidate sequence name from flatfile
87         
88         while((d=blastFile.get()) != '('){}
89         blastFile >> candidateLength;                           //      Get the candidate sequence length from flatfile
90         
91         while((d=blastFile.get())){
92                 if(d == '>'){
93                         blastFile >> templateName;                      //      Get the template sequence name from flatfile
94                         break;
95                 }
96                 else if(d == '*'){                                                                      //      We go here if there is no significant match
97                         
98                         seqAstart = 0;
99                         seqBstart = 0;
100                         seqAend = 0;
101                         seqBend = 0;
102                         pairwiseLength = 0;
103                         
104 //                      string dummy;
105 //                      while(dummy != "query:"){       m->mothurOut(dummy, ""); m->mothurOutEndLine(); blastFile >> dummy;     }
106 //                      blastFile >> seqBend;
107 //                      m->mothurOut(toString(seqBend), ""); m->mothurOutEndLine();
108 //                      for(int i=0;i<seqBend;i++){
109 //                              seqAaln += 'Z';
110 //                              seqBaln += 'X';
111 //                      }
112 //                      pairwiseLength = 0;
113                         return;
114                 }
115         }
116         
117         while((d=blastFile.get()) != '='){}
118         blastFile >> templateLength;                            //      Get the template sequence length from flatfile
119                 
120         while((d=blastFile.get()) != 'Q'){}                     //      Suck up everything else until we get to the start of the alignment
121         int queryStart, sbjctStart, queryEnd, sbjctEnd;
122         string queryLabel, sbjctLabel, query, sbjct;
123
124         blastFile >> queryLabel;        queryLabel = 'Q' + queryLabel;
125
126         
127         while(queryLabel == "Query:"){
128                 blastFile >> queryStart >> query >> queryEnd;
129                 
130                 while((d=blastFile.get()) != 'S'){};
131                 
132                 blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
133                 
134                 if(seqAaln == ""){
135                         seqAstart = queryStart;
136                         seqBstart = sbjctStart;
137                 }
138
139                 seqAaln += query;                                       //      concatenate each line of the sequence to what we already have
140                 seqBaln += sbjct;                                       //      for the query and template (subject) sequence
141                 
142                 blastFile >> queryLabel;
143         }
144         seqAend = queryEnd;
145         seqBend = sbjctEnd;
146         pairwiseLength = seqAaln.length();
147
148         for(int i=1;i<seqBstart;i++){                           //      Since the alignments don't always start at (1, 1), we need to pad
149                 seqAaln = 'Z' + seqAaln;                                //      the sequences so that they start at the same point
150                 seqBaln = 'X' + seqBaln;
151         }
152         
153         for(int i=seqBend+1;i<=templateLength;i++){     //      since the sequences don't necessarily end at the same point, we
154                 seqAaln += 'Z';                                                 //      again need ot pad the sequences so that they extend to the length
155                 seqBaln += 'X';                                                 //      of the template sequence
156         }
157         blastFile.close();
158 }
159
160 //**************************************************************************************************/