matches = []
- while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
- pos, length, mis, ins, del = result
-
- match = Match.new(pos, length, mis, ins, del, self.seq[pos ... pos + length])
-
- if block_given?
- yield match
- else
- matches << match
+ if block_given?
+ while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
+ yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
+ offset = result.first + 1
+ end
+ else
+ while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
+ matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
+ offset = result.first + 1
end
-
- offset = match.pos + 1
end
return matches.empty? ? nil : matches unless block_given?
private
inline do |builder|
+ builder.add_static "id_seq", 'rb_intern("@seq")', "ID"
+
# Macro for matching nucleotides including ambiguity codes.
builder.prefix %{
#define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0)
}
- # Defining a struct for returning match information.
- builder.prefix %{
- typedef struct
- {
- unsigned int end;
- unsigned int mis;
- unsigned int ins;
- unsigned int del;
- } match;
- }
-
# Bitmap for matching nucleotides including ambiguity codes.
# For each value bits are set from the left: bit pos 1 for A,
# bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G.
# mismatches, ins insertions and del deletions. ss is the start of the sequence, used only for
# reporting the match endpoints.
builder.prefix %{
- match* backtrack(
- char *ss, // Sequence start
- char *s, // Sequence
- char *p, // Pattern
- unsigned int mis, // Max mismatches
- unsigned int ins, // Max insertions
- unsigned int del, // Max deletions
- int state // 1 deletion, -1 insertion, 0 match or mismatch
+ unsigned int backtrack(
+ char *ss, // Sequence start
+ char *s, // Sequence
+ char *p, // Pattern
+ int mis, // Max mismatches
+ int ins, // Max insertions
+ int del // Max deletions
)
{
- match found = {0, 0, 0, 0};
- match *f = &found;
+ unsigned int r = 0;
while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment
if (!*p)
- {
- f->end = (unsigned int) (s - ss);
- f->mis = mis;
- f->ins = ins;
- f->del = del;
-
- return f;
- }
+ return (unsigned int) (s - ss);
else
{
- if (mis && *s && *p && (f = backtrack(ss, s + 1, p + 1, mis - 1, ins, del, 0))) return f;
- if (ins && *p && (state != 1) && (f = backtrack(ss, s, p + 1, mis, ins - 1, del, -1))) return f;
- if (del && *s && (state != -1) && (f = backtrack(ss, s + 1, p, mis, ins, del - 1, 1))) return f;
+ if (mis && *s && *p && (r = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return r;
+ if (ins && *p && (r = backtrack(ss, s, p + 1, mis, ins - 1, del))) return r;
+ if (del && *s && (r = backtrack(ss, s + 1, p, mis, ins, del - 1))) return r;
}
- return NULL;
+ return 0;
}
}
# insertions and del deletions.
builder.c %{
VALUE track_C(
- VALUE _s, // Sequence
- VALUE _len, // Sequence length
- VALUE _p, // Pattern
- VALUE _pos, // Start position
- VALUE _max_mis, // Maximum mismatches
- VALUE _max_ins, // Maximum insertions
- VALUE _max_del // Maximum deletions
+ VALUE _s, // Sequence
+ VALUE _len, // Sequence length
+ VALUE _p, // Pattern
+ VALUE _pos, // Start position
+ VALUE _mis, // Maximum mismatches
+ VALUE _ins, // Maximum insertions
+ VALUE _del // Maximum deletions
)
{
- char *s = StringValuePtr(_s);
- char *p = StringValuePtr(_p);
- unsigned int len = FIX2UINT(_len);
- unsigned int pos = FIX2UINT(_pos);
- unsigned int max_mis = FIX2UINT(_max_mis);
- unsigned int max_ins = FIX2UINT(_max_ins);
- unsigned int max_del = FIX2UINT(_max_del);
-
- char *ss = s;
- match *f = NULL;
- unsigned int mbeg = 0;
- unsigned int mlen = 0;
- unsigned int mmis = 0;
- unsigned int mins = 0;
- unsigned int mdel = 0;
- int state = 0;
- VALUE ary = 0;
-
- if (pos < len) // FIXME: pos < len - plen + 1
+ char *s = StringValuePtr(_s);
+ char *p = StringValuePtr(_p);
+ unsigned int len = FIX2UINT(_len);
+ unsigned int pos = FIX2UINT(_pos);
+ unsigned int mis = FIX2UINT(_mis);
+ unsigned int ins = FIX2UINT(_ins);
+ unsigned int del = FIX2UINT(_del);
+
+ char *ss = s;
+ unsigned int e = 0;
+ VALUE tuple;
+
+ if (pos < len)
{
s += pos;
while (*s)
{
- if ((f = backtrack(ss, s, p, max_mis, max_ins, max_del, state)))
+ if ((e = backtrack(ss, s, p, mis, ins, del)))
{
- mbeg = (s - ss);
- mlen = f->end - mbeg;
- mmis = max_mis - f->mis;
- mins = max_ins - f->ins;
- mdel = max_del - f->del;
-
- ary = rb_ary_new();
- rb_ary_push(ary, INT2FIX(mbeg));
- rb_ary_push(ary, INT2FIX(mlen));
- rb_ary_push(ary, INT2FIX(mmis));
- rb_ary_push(ary, INT2FIX(mins));
- rb_ary_push(ary, INT2FIX(mdel));
- return ary;
+ tuple = rb_ary_new();
+ rb_ary_push(tuple, INT2FIX((int) (s - ss)));
+ rb_ary_push(tuple, INT2FIX((int) e - (s - ss)));
+ return tuple;
}
s++;
# Class containing match information.
class Match
- attr_reader :pos, :length, :mis, :ins, :del, :match
+ attr_reader :pos, :length, :match
- def initialize(pos, length, mis, ins, del, match)
+ def initialize(pos, length, match)
@pos = pos
@length = length
- @mis = mis
- @ins = ins
- @del = del
@match = match
end
def to_s
- "#{@pos}:#{@length}:#{@mis}:#{@ins}:#{@del}:#{@match}"
+ "#{pos}:#{length}:#{match}"
end
end
end