matches = []
if block_given?
- while result = self.track(pattern, offset, max_mismatches, max_insertions, max_deletions)
+ while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
offset = result.first + 1
end
else
- while result = self.track(pattern, offset, max_mismatches, max_insertions, max_deletions)
+ while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
offset = result.first + 1
end
# Macro for matching nucleotides including ambiguity codes.
builder.prefix %{
- #define MATCH(A,B) ((bitmap[(unsigned short) A] & bitmap[(unsigned short) B]) != 0)
+ #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0)
}
# Bitmap for matching nucleotides including ambiguity codes.
# For each value bits are set from the left: bit pos 1 for A,
# bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G.
builder.prefix %{
- unsigned short bitmap[256] = {
+ char bitmap[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
}
- # Backtrack algorithm for matching a pattern (p) starting in a sequence (s) allowing for mm
+ # Backtrack algorithm for matching a pattern (p) starting in a sequence (s) allowing for mis
# mismatches, ins insertions and del deletions. ss is the start of the sequence, used only for
# reporting the match endpoints.
builder.prefix %{
- unsigned int backtrack(char* ss, char* s, char* p, int mm, int ins, int del)
+ unsigned int backtrack(
+ char *ss, // Sequence start
+ char *s, // Sequence
+ char *p, // Pattern
+ int mis, // Max mismatches
+ int ins, // Max insertions
+ int del // Max deletions
+ )
{
- unsigned int r = 0;
+ unsigned int r = 0;
- while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment
+ while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment
- if (!*p)
- return (unsigned int) (s - ss);
- else
- {
- if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r;
- if (ins && *p && (r = backtrack(ss, s, p + 1, mm, ins - 1, del))) return r;
- if (del && *s && (r = backtrack(ss, s + 1, p, mm, ins, del - 1))) return r;
- }
+ if (!*p)
+ return (unsigned int) (s - ss);
+ else
+ {
+ if (mis && *s && *p && (r = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return r;
+ if (ins && *p && (r = backtrack(ss, s, p + 1, mis, ins - 1, del))) return r;
+ if (del && *s && (r = backtrack(ss, s + 1, p, mis, ins, del - 1))) return r;
+ }
- return 0;
+ return 0;
}
}
- # Find pattern (p) in a sequence (@seq) starting at pos, with at most mm mismatches, ins
+ # Find pattern (p) in a sequence (s) starting at pos, with at most mis mismatches, ins
# insertions and del deletions.
builder.c %{
- static VALUE track(char* p, unsigned int pos, int mm, int ins, int del)
+ VALUE track_C(
+ VALUE _s, // Sequence
+ VALUE _len, // Sequence length
+ VALUE _p, // Pattern
+ VALUE _pos, // Start position
+ VALUE _mis, // Maximum mismatches
+ VALUE _ins, // Maximum insertions
+ VALUE _del // Maximum deletions
+ )
{
- VALUE seq = rb_ivar_get(self, id_seq);
- char* s = StringValuePtr(seq);
- char* ss = s;
- unsigned int e;
- VALUE tuple;
-
- if (pos < strlen(s))
+ char *s = StringValuePtr(_s);
+ char *p = StringValuePtr(_p);
+ unsigned int len = FIX2UINT(_len);
+ unsigned int pos = FIX2UINT(_pos);
+ unsigned int mis = FIX2UINT(_mis);
+ unsigned int ins = FIX2UINT(_ins);
+ unsigned int del = FIX2UINT(_del);
+
+ char *ss = s;
+ unsigned int e = 0;
+ VALUE tuple;
+
+ if (pos < len)
+ {
+ s += pos;
+
+ while (*s)
{
- s += pos;
-
- while (*s)
- {
- if ((e = backtrack(ss, s, p, mm, ins, del)))
- {
- tuple = rb_ary_new();
- rb_ary_push(tuple, INT2FIX((int) (s - ss)));
- rb_ary_push(tuple, INT2FIX((int) e - (s - ss)));
- return tuple;
- }
-
- s++;
- }
+ if ((e = backtrack(ss, s, p, mis, ins, del)))
+ {
+ tuple = rb_ary_new();
+ rb_ary_push(tuple, INT2FIX((int) (s - ss)));
+ rb_ary_push(tuple, INT2FIX((int) e - (s - ss)));
+ return tuple;
+ }
+
+ s++;
}
+ }
- return Qnil;
- }
+ return Qnil;
+ }
}
end