]> git.donarmstrong.com Git - mothur.git/blob - needlemanoverlap.cpp
changes while testing
[mothur.git] / needlemanoverlap.cpp
1 /*
2  *  needleman.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in:
9  *              
10  *              Gotoh O. 1982.  An improved algorithm for matching biological sequences.  J. Mol. Biol.  162:705-8.
11  *              Myers, EW & Miller, W.  1988.  Optimal alignments in linear space.  Comput Appl Biosci. 4:11-7.
12  *
13  *      This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used
14  *      in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position.
15  *      Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate
16  *      method to fix the ends (see Overlap class documentation)
17  *
18  */
19
20 #include "alignmentcell.hpp"
21 #include "alignment.hpp"
22 #include "overlap.hpp"
23 #include "needlemanoverlap.hpp"
24
25 /**************************************************************************************************/
26
27 NeedlemanOverlap::NeedlemanOverlap(float gO, float f, float mm, int r) ://      note that we don't have a gap extend
28 gap(gO), match(f), mismatch(mm), Alignment(r) {                                                 //      the gap openning penalty is assessed for
29         try {                                                                                                                                   //      every gapped position
30                 for(int i=1;i<nCols;i++){
31                         alignment[0][i].prevCell = 'l';                                 //      initialize first row by pointing all poiters to the left
32                         alignment[0][i].cValue = 0;                                             //      and the score to zero
33                 }
34                 
35                 for(int i=1;i<nRows;i++){
36                         alignment[i][0].prevCell = 'u';                                 //      initialize first column by pointing all poiters upwards
37                         alignment[i][0].cValue = 0;                                             //      and the score to zero
38                 }
39         
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "NeedlemanOverlap", "NeedlemanOverlap");
43                 exit(1);
44         }
45 }
46 /**************************************************************************************************/
47
48 NeedlemanOverlap::~NeedlemanOverlap(){  /*      do nothing      */      }
49
50 /**************************************************************************************************/
51
52 void NeedlemanOverlap::align(string A, string B){
53         try {
54         
55                 seqA = ' ' + A; lA = seqA.length();             //      algorithm requires a dummy space at the beginning of each string
56                 seqB = ' ' + B; lB = seqB.length();             //      algorithm requires a dummy space at the beginning of each string
57
58                 if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine();  }
59                 
60                 for(int i=1;i<lB;i++){                                  //      This code was largely translated from Perl code provided in Ex 3.1 
61                 
62                         for(int j=1;j<lA;j++){                          //      of the O'Reilly BLAST book.  I found that the example output had a
63         
64                                 //      number of errors
65                                 float diagonal;
66                                 if(seqB[i] == seqA[j])  {       diagonal = alignment[i-1][j-1].cValue + match;          }
67                                 else                                    {       diagonal = alignment[i-1][j-1].cValue + mismatch;       }
68                         
69                                 float up        = alignment[i-1][j].cValue + gap;
70                                 float left      = alignment[i][j-1].cValue + gap;
71                                 
72                                 if(diagonal >= up){
73                                         if(diagonal >= left){
74                                                 alignment[i][j].cValue = diagonal;
75                                                 alignment[i][j].prevCell = 'd';
76                                         }
77                                         else{
78                                                 alignment[i][j].cValue = left;
79                                                 alignment[i][j].prevCell = 'l';
80                                         }
81                                 }
82                                 else{
83                                         if(up >= left){
84                                                 alignment[i][j].cValue = up;
85                                                 alignment[i][j].prevCell = 'u';
86                                         }
87                                         else{
88                                                 alignment[i][j].cValue = left;
89                                                 alignment[i][j].prevCell = 'l';
90                                         }
91                                 }
92                         }
93                 }
94
95                 Overlap over;                                           
96                 over.setOverlap(alignment, lA, lB, 0);          //      Fix gaps at the beginning and end of the sequences
97                 traceBack();                                                            //      Traceback the alignment to populate seqAaln and seqBaln
98         
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "NeedlemanOverlap", "align");
102                 exit(1);
103         }
104
105 }
106 /**************************************************************************************************/
107
108 void NeedlemanOverlap::alignPrimer(string A, string B){
109         try {
110         
111                 seqA = ' ' + A; lA = seqA.length();             //      algorithm requires a dummy space at the beginning of each string
112                 seqB = ' ' + B; lB = seqB.length();             //      algorithm requires a dummy space at the beginning of each string
113         
114                 if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine();  }
115                 
116                 for(int i=1;i<lB;i++){                                  //      This code was largely translated from Perl code provided in Ex 3.1
117             
118                         for(int j=1;j<lA;j++){                          //      of the O'Reilly BLAST book.  I found that the example output had a
119                 
120                                 //      number of errors
121                                 float diagonal;
122                                 if(isEquivalent(seqB[i],seqA[j]))       {       diagonal = alignment[i-1][j-1].cValue + match;          }
123                                 else                                {   diagonal = alignment[i-1][j-1].cValue + mismatch;       }
124                 
125                                 float up        = alignment[i-1][j].cValue + gap;
126                                 float left      = alignment[i][j-1].cValue + gap;
127                                 
128                                 if(diagonal >= up){
129                                         if(diagonal >= left){
130                                                 alignment[i][j].cValue = diagonal;
131                                                 alignment[i][j].prevCell = 'd';
132                                         }
133                                         else{
134                                                 alignment[i][j].cValue = left;
135                                                 alignment[i][j].prevCell = 'l';
136                                         }
137                                 }
138                                 else{
139                                         if(up >= left){
140                                                 alignment[i][j].cValue = up;
141                                                 alignment[i][j].prevCell = 'u';
142                                         }
143                                         else{
144                                                 alignment[i][j].cValue = left;
145                                                 alignment[i][j].prevCell = 'l';
146                                         }
147                                 }
148                         }
149                 }
150         
151                 Overlap over;
152                 over.setOverlap(alignment, lA, lB, 0);          //      Fix gaps at the beginning and end of the sequences
153                 traceBack();                                                            //      Traceback the alignment to populate seqAaln and seqBaln
154         
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "NeedlemanOverlap", "alignPrimer");
158                 exit(1);
159         }
160     
161 }
162 //********************************************************************/
163 bool NeedlemanOverlap::isEquivalent(char oligo, char seq){
164         try {
165                 
166         bool same = true;
167                                         
168         oligo = toupper(oligo);
169         seq = toupper(seq);
170         
171         if(oligo != seq){
172             if(oligo == 'A' && (seq != 'A' && seq != 'M' && seq != 'R' && seq != 'W' && seq != 'D' && seq != 'H' && seq != 'V'))       {        same = false;   }
173             else if(oligo == 'C' && (seq != 'C' && seq != 'Y' && seq != 'M' && seq != 'S' && seq != 'B' && seq != 'H' && seq != 'V'))       {   same = false;   }
174             else if(oligo == 'G' && (seq != 'G' && seq != 'R' && seq != 'K' && seq != 'S' && seq != 'B' && seq != 'D' && seq != 'V'))       {   same = false;   }
175             else if(oligo == 'T' && (seq != 'T' && seq != 'Y' && seq != 'K' && seq != 'W' && seq != 'B' && seq != 'D' && seq != 'H'))       {   same = false;   }
176             else if((oligo == '.' || oligo == '-'))           { same = false;   }
177             else if((oligo == 'N' || oligo == 'I') && (seq == 'N'))                         {   same = false;   }
178             else if(oligo == 'R' && (seq != 'A' && seq != 'G'))                        {        same = false;   }
179             else if(oligo == 'Y' && (seq != 'C' && seq != 'T'))                        {        same = false;   }
180             else if(oligo == 'M' && (seq != 'C' && seq != 'A'))                        {        same = false;   }
181             else if(oligo == 'K' && (seq != 'T' && seq != 'G'))                        {        same = false;   }
182             else if(oligo == 'W' && (seq != 'T' && seq != 'A'))                        {        same = false;   }
183             else if(oligo == 'S' && (seq != 'C' && seq != 'G'))                        {        same = false;   }
184             else if(oligo == 'B' && (seq != 'C' && seq != 'T' && seq != 'G'))       {   same = false;   }
185             else if(oligo == 'D' && (seq != 'A' && seq != 'T' && seq != 'G'))       {   same = false;   }
186             else if(oligo == 'H' && (seq != 'A' && seq != 'T' && seq != 'C'))       {   same = false;   }
187             else if(oligo == 'V' && (seq != 'A' && seq != 'C' && seq != 'G'))       {   same = false;   }
188         }
189
190                 
191                 
192                 return same;
193         }
194         catch(exception& e) {
195                 m->errorOut(e, "TrimOligos", "countDiffs");
196                 exit(1);
197         }
198 }
199 /**************************************************************************************************/
200