5 * Created by Pat Schloss on 12/15/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
8 * This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in:
10 * Gotoh O. 1982. An improved algorithm for matching biological sequences. J. Mol. Biol. 162:705-8.
11 * Myers, EW & Miller, W. 1988. Optimal alignments in linear space. Comput Appl Biosci. 4:11-7.
13 * This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used
14 * in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position.
15 * Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate
16 * method to fix the ends (see Overlap class documentation)
21 #include "alignmentcell.hpp"
22 #include "overlap.hpp"
23 #include "alignment.hpp"
24 #include "gotohoverlap.hpp"
26 /**************************************************************************************************/
28 GotohOverlap::GotohOverlap(float gO, float gE, float f, float mm, int r) :
29 gapOpen(gO), gapExtend(gE), match(f), mismatch(mm), Alignment(r) {
32 for(int i=1;i<nCols;i++){ // we initialize the dynamic programming matrix by setting the pointers in
33 alignment[0][i].prevCell = 'l'; // the first row to the left
34 alignment[0][i].cValue = 0;
35 alignment[0][i].dValue = 0;
38 for(int i=1;i<nRows;i++){ // we initialize the dynamic programming matrix by setting the pointers in
39 alignment[i][0].prevCell = 'u'; // the first column upward
40 alignment[i][0].cValue = 0;
41 alignment[i][0].iValue = 0;
46 m->errorOut(e, "GotohOverlap", "GotohOverlap");
51 /**************************************************************************************************/
53 void GotohOverlap::align(string A, string B){
55 seqA = ' ' + A; lA = seqA.length(); // the algorithm requires that the first character be a dummy value
56 seqB = ' ' + B; lB = seqB.length(); // the algorithm requires that the first character be a dummy value
58 for(int i=1;i<lB;i++){ // the recursion here is shown in Webb and Miller, Fig. 1A. Note that
59 for(int j=1;j<lA;j++){ // if we need to conserve on space we should see Fig. 1B, which is linear
60 // in space, which I think is unnecessary
62 if(seqB[i] == seqA[j]) { diagonal = alignment[i-1][j-1].cValue + match; }
63 else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
65 alignment[i][j].iValue = max(alignment[i][j-1].iValue, alignment[i][j-1].cValue + gapOpen) + gapExtend;
66 alignment[i][j].dValue = max(alignment[i-1][j].dValue, alignment[i-1][j].cValue + gapOpen) + gapExtend;
68 if(alignment[i][j].iValue > alignment[i][j].dValue){
69 if(alignment[i][j].iValue > diagonal){
70 alignment[i][j].cValue = alignment[i][j].iValue;
71 alignment[i][j].prevCell = 'l';
74 alignment[i][j].cValue = diagonal;
75 alignment[i][j].prevCell = 'd';
79 if(alignment[i][j].dValue > diagonal){
80 alignment[i][j].cValue = alignment[i][j].dValue;
81 alignment[i][j].prevCell = 'u';
84 alignment[i][j].cValue = diagonal;
85 alignment[i][j].prevCell = 'd';
92 over.setOverlap(alignment, lA, lB, 0); // Fix the gaps at the ends of the sequences
93 traceBack(); // Construct the alignment and set seqAaln and seqBaln
97 m->errorOut(e, "GotohOverlap", "align");
102 /**************************************************************************************************/