5 * Created by Pat Schloss on 12/15/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
8 * This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in:
10 * Gotoh O. 1982. An improved algorithm for matching biological sequences. J. Mol. Biol. 162:705-8.
11 * Myers, EW & Miller, W. 1988. Optimal alignments in linear space. Comput Appl Biosci. 4:11-7.
13 * This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used
14 * in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position.
15 * Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate
16 * method to fix the ends (see Overlap class documentation)
20 #include "alignmentcell.hpp"
21 #include "alignment.hpp"
22 #include "overlap.hpp"
23 #include "needlemanoverlap.hpp"
25 /**************************************************************************************************/
27 NeedlemanOverlap::NeedlemanOverlap(float gO, float f, float mm, int r) :// note that we don't have a gap extend
28 gap(gO), match(f), mismatch(mm), Alignment(r) { // the gap openning penalty is assessed for
29 try { // every gapped position
30 for(int i=1;i<nCols;i++){
31 alignment[0][i].prevCell = 'l'; // initialize first row by pointing all poiters to the left
32 alignment[0][i].cValue = 0; // and the score to zero
35 for(int i=1;i<nRows;i++){
36 alignment[i][0].prevCell = 'u'; // initialize first column by pointing all poiters upwards
37 alignment[i][0].cValue = 0; // and the score to zero
42 m->errorOut(e, "NeedlemanOverlap", "NeedlemanOverlap");
46 /**************************************************************************************************/
48 NeedlemanOverlap::~NeedlemanOverlap(){ /* do nothing */ }
50 /**************************************************************************************************/
52 void NeedlemanOverlap::align(string A, string B){
55 seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string
56 seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string
58 if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine(); }
60 for(int i=1;i<lB;i++){ // This code was largely translated from Perl code provided in Ex 3.1
62 for(int j=1;j<lA;j++){ // of the O'Reilly BLAST book. I found that the example output had a
66 if(seqB[i] == seqA[j]) { diagonal = alignment[i-1][j-1].cValue + match; }
67 else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
69 float up = alignment[i-1][j].cValue + gap;
70 float left = alignment[i][j-1].cValue + gap;
74 alignment[i][j].cValue = diagonal;
75 alignment[i][j].prevCell = 'd';
78 alignment[i][j].cValue = left;
79 alignment[i][j].prevCell = 'l';
84 alignment[i][j].cValue = up;
85 alignment[i][j].prevCell = 'u';
88 alignment[i][j].cValue = left;
89 alignment[i][j].prevCell = 'l';
96 over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences
97 traceBack(); // Traceback the alignment to populate seqAaln and seqBaln
100 catch(exception& e) {
101 m->errorOut(e, "NeedlemanOverlap", "align");
106 /**************************************************************************************************/
108 void NeedlemanOverlap::alignPrimer(string A, string B){
111 seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string
112 seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string
114 if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine(); }
116 for(int i=1;i<lB;i++){ // This code was largely translated from Perl code provided in Ex 3.1
118 for(int j=1;j<lA;j++){ // of the O'Reilly BLAST book. I found that the example output had a
122 if(isEquivalent(seqB[i],seqA[j])) { diagonal = alignment[i-1][j-1].cValue + match; }
123 else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
125 float up = alignment[i-1][j].cValue + gap;
126 float left = alignment[i][j-1].cValue + gap;
129 if(diagonal >= left){
130 alignment[i][j].cValue = diagonal;
131 alignment[i][j].prevCell = 'd';
134 alignment[i][j].cValue = left;
135 alignment[i][j].prevCell = 'l';
140 alignment[i][j].cValue = up;
141 alignment[i][j].prevCell = 'u';
144 alignment[i][j].cValue = left;
145 alignment[i][j].prevCell = 'l';
152 over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences
153 traceBack(); // Traceback the alignment to populate seqAaln and seqBaln
156 catch(exception& e) {
157 m->errorOut(e, "NeedlemanOverlap", "align");
162 //********************************************************************/
163 bool NeedlemanOverlap::isEquivalent(char oligo, char seq){
168 oligo = toupper(oligo);
172 if(oligo == 'A' && (seq != 'A' && seq != 'M' && seq != 'R' && seq != 'W' && seq != 'D' && seq != 'H' && seq != 'V')) { same = false; }
173 else if(oligo == 'C' && (seq != 'C' && seq != 'Y' && seq != 'M' && seq != 'S' && seq != 'B' && seq != 'H' && seq != 'V')) { same = false; }
174 else if(oligo == 'G' && (seq != 'G' && seq != 'R' && seq != 'K' && seq != 'S' && seq != 'B' && seq != 'D' && seq != 'V')) { same = false; }
175 else if(oligo == 'T' && (seq != 'T' && seq != 'Y' && seq != 'K' && seq != 'W' && seq != 'B' && seq != 'D' && seq != 'H')) { same = false; }
176 else if((oligo == '.' || oligo == '-')) { same = false; }
177 else if((oligo == 'N' || oligo == 'I') && (seq == 'N')) { same = false; }
178 else if(oligo == 'R' && (seq != 'A' && seq != 'G')) { same = false; }
179 else if(oligo == 'Y' && (seq != 'C' && seq != 'T')) { same = false; }
180 else if(oligo == 'M' && (seq != 'C' && seq != 'A')) { same = false; }
181 else if(oligo == 'K' && (seq != 'T' && seq != 'G')) { same = false; }
182 else if(oligo == 'W' && (seq != 'T' && seq != 'A')) { same = false; }
183 else if(oligo == 'S' && (seq != 'C' && seq != 'G')) { same = false; }
184 else if(oligo == 'B' && (seq != 'C' && seq != 'T' && seq != 'G')) { same = false; }
185 else if(oligo == 'D' && (seq != 'A' && seq != 'T' && seq != 'G')) { same = false; }
186 else if(oligo == 'H' && (seq != 'A' && seq != 'T' && seq != 'C')) { same = false; }
187 else if(oligo == 'V' && (seq != 'A' && seq != 'C' && seq != 'G')) { same = false; }
194 catch(exception& e) {
195 m->errorOut(e, "TrimOligos", "countDiffs");
199 /**************************************************************************************************/