1 # Copyright (C) 2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Class to calculate the Levenshtein distance between two
29 # http://en.wikipedia.org/wiki/Levenshtein_distance
33 def self.distance(s, t)
35 return t.length if s.length == 0;
36 return s.length if t.length == 0;
38 v0 = "\0" * (t.length + 1) * BYTES_IN_INT
39 v1 = "\0" * (t.length + 1) * BYTES_IN_INT
42 l.distance_C(s, t, s.length, t.length, v0, v1)
45 # >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<<
48 # Macro for matching nucleotides including ambiguity codes.
50 #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0)
53 # Bitmap for matching nucleotides including ambiguity codes.
54 # For each value bits are set from the left: bit pos 1 for A,
55 # bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G.
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
63 0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
64 0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
65 0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
66 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
68 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78 unsigned int min(unsigned int a, unsigned int b, unsigned int c)
93 VALUE _s_len, // string length
94 VALUE _t_len, // string length
95 VALUE _v0, // score vector
96 VALUE _v1 // score vector
99 char *s = (char *) StringValuePtr(_s);
100 char *t = (char *) StringValuePtr(_t);
101 unsigned int s_len = FIX2UINT(_s_len);
102 unsigned int t_len = FIX2UINT(_t_len);
103 unsigned int *v0 = (unsigned int *) StringValuePtr(_v0);
104 unsigned int *v1 = (unsigned int *) StringValuePtr(_v1);
108 unsigned int cost = 0;
110 for (i = 0; i < t_len + 1; i++)
113 for (i = 0; i < s_len; i++)
117 for (j = 0; j < t_len; j++)
119 cost = (MATCH(s[i], t[j])) ? 0 : 1;
120 v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost);
123 for (j = 0; j < t_len + 1; j++)
127 return UINT2NUM(v1[t_len]);
133 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<