1 # Copyright (C) 2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 require 'maasha/seq/ambiguity'
28 # Class to calculate the Levenshtein distance between two
30 # http://en.wikipedia.org/wiki/Levenshtein_distance
36 def self.distance(s, t)
38 return t.length if s.length == 0;
39 return s.length if t.length == 0;
41 v0 = "\0" * (t.length + 1) * BYTES_IN_INT
42 v1 = "\0" * (t.length + 1) * BYTES_IN_INT
45 l.distance_C(s, t, s.length, t.length, v0, v1)
48 # >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<<
51 add_ambiguity_macro(builder)
54 unsigned int min(unsigned int a, unsigned int b, unsigned int c)
69 VALUE _s_len, // string length
70 VALUE _t_len, // string length
71 VALUE _v0, // score vector
72 VALUE _v1 // score vector
75 char *s = (char *) StringValuePtr(_s);
76 char *t = (char *) StringValuePtr(_t);
77 unsigned int s_len = FIX2UINT(_s_len);
78 unsigned int t_len = FIX2UINT(_t_len);
79 unsigned int *v0 = (unsigned int *) StringValuePtr(_v0);
80 unsigned int *v1 = (unsigned int *) StringValuePtr(_v1);
84 unsigned int cost = 0;
86 for (i = 0; i < t_len + 1; i++)
89 for (i = 0; i < s_len; i++)
93 for (j = 0; j < t_len; j++)
95 cost = (MATCH(s[i], t[j])) ? 0 : 1;
96 v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost);
99 for (j = 0; j < t_len + 1; j++)
103 return UINT2NUM(v1[t_len]);
109 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<