5 * Created by Pat Schloss on 12/16/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
8 * This is a basic alignment method that gets the blast program to do the heavy lifting. In the future, we should
9 * probably incorporate NCBI's library so that we don't have to call on a user-supplied executable. This is a child
10 * of the Alignment class, which requires a constructor and align method.
16 #include "alignment.hpp"
17 #include "blastalign.hpp"
20 //**************************************************************************************************/
22 BlastAlignment::BlastAlignment(float go, float ge, float m, float mm) :
23 match(m), // This is the score to award for two nucleotides matching (match >= 0)
24 mismatch(mm) // This is the penalty to assess for a mismatch (mismatch <= 0)
26 globaldata = GlobalData::getInstance();
27 path = globaldata->argv;
28 path = path.substr(0, (path.find_last_of('m')));
30 gapOpen = abs(go); // This is the penalty to assess for opening a gap (gapOpen >= 0)
31 gapExtend = abs(ge); // This is the penalty to assess for extending a gap (gapExtend >= 0)
33 int randNumber = rand();
34 candidateFileName = toString(randNumber) + ".candidate";
35 templateFileName = toString(randNumber) + ".template";
36 blastFileName = toString(randNumber) + ".pairwise";
39 //**************************************************************************************************/
41 BlastAlignment::~BlastAlignment(){ // The desctructor should clean up by removing the temporary
42 remove(candidateFileName.c_str()); // files used to run bl2seq
43 remove(templateFileName.c_str());
44 remove(blastFileName.c_str());
47 //**************************************************************************************************/
49 void BlastAlignment::align(string seqA, string seqB){ //Use blastn to align the two sequences
51 ofstream candidateFile(candidateFileName.c_str()); // Write the sequence to be aligned to a temporary candidate seq file
52 candidateFile << ">candidate" << endl << seqA << endl;
53 candidateFile.close();
55 ofstream templateFile(templateFileName.c_str()); // Write the unaligned template sequence to a temporary candidate seq file
56 templateFile << ">template" << endl << seqB << endl;
59 // The blastCommand assumes that we have DNA sequences (blastn) and that they are fairly similar (-e 0.001) and
60 // that we don't want to apply any kind of complexity filtering (-F F)
61 string blastCommand = path + "blast/bin/bl2seq -p blastn -i " + candidateFileName + " -j " + templateFileName + " -e 0.0001 -F F -o " + blastFileName + " -W 11";
62 blastCommand += " -r " + toString(match) + " -q " + toString(mismatch);
63 blastCommand += " -G " + toString(gapOpen) + " -E " + toString(gapExtend);
65 system(blastCommand.c_str()); // Here we assume that "bl2seq" is in the users path or in the same folder as
70 /**************************************************************************************************/
72 void BlastAlignment::setPairwiseSeqs(){ // This method call assigns the blast generated alignment
73 // to the pairwise entry in the Sequence class for the
74 // candidate and template Sequence objects
76 openInputFile(blastFileName, blastFile);
81 int candidateLength, templateLength;
84 string candidateName, templateName;
86 while(d=blastFile.get() != '='){};
87 blastFile >> candidateName; // Get the candidate sequence name from flatfile
89 while(d=blastFile.get() != '('){};
90 blastFile >> candidateLength; // Get the candidate sequence length from flatfile
92 while(d=blastFile.get()){
94 blastFile >> templateName; // Get the template sequence name from flatfile
97 else if(d == '*'){ // We go here if there is no significant match
106 // while(dummy != "query:"){ cout << dummy << endl;blastFile >> dummy; }
107 // blastFile >> seqBend;
108 // cout << seqBend << endl;
109 // for(int i=0;i<seqBend;i++){
113 // pairwiseLength = 0;
118 while(d=blastFile.get() != '='){};
119 blastFile >> templateLength; // Get the template sequence length from flatfile
121 while(d=blastFile.get() != 'Q'){}; // Suck up everything else until we get to the start of the alignment
122 int queryStart, sbjctStart, queryEnd, sbjctEnd;
123 string queryLabel, sbjctLabel, query, sbjct;
125 blastFile >> queryLabel; queryLabel = 'Q' + queryLabel;
128 while(queryLabel == "Query:"){
129 blastFile >> queryStart >> query >> queryEnd;
131 while(d=blastFile.get() != 'S'){};
133 blastFile >> sbjctLabel >> sbjctStart >> sbjct >> sbjctEnd;
136 seqAstart = queryStart;
137 seqBstart = sbjctStart;
140 seqAaln += query; // concatenate each line of the sequence to what we already have
141 seqBaln += sbjct; // for the query and template (subject) sequence
143 blastFile >> queryLabel;
147 pairwiseLength = seqAaln.length();
149 for(int i=1;i<seqBstart;i++){ // Since the alignments don't always start at (1, 1), we need to pad
150 seqAaln = 'Z' + seqAaln; // the sequences so that they start at the same point
151 seqBaln = 'X' + seqBaln;
154 for(int i=seqBend+1;i<=templateLength;i++){ // since the sequences don't necessarily end at the same point, we
155 seqAaln += 'Z'; // again need ot pad the sequences so that they extend to the length
156 seqBaln += 'X'; // of the template sequence
160 //**************************************************************************************************/