matches = []
- if block_given?
- while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
- yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
- offset = result.first + 1
- end
- else
- while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
- matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
- offset = result.first + 1
+ while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
+ match = Match.new(result[0], result[1], result[2], result[3], result[4], self.seq[result[0] ... result[0] + result[1]])
+
+ if block_given?
+ yield match
+ else
+ matches << match
end
+ offset = match.pos + 1
end
return matches.empty? ? nil : matches unless block_given?
private
inline do |builder|
- builder.add_static "id_seq", 'rb_intern("@seq")', "ID"
-
# Macro for matching nucleotides including ambiguity codes.
builder.prefix %{
#define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0)
}
+ # Defining a struct for returning match information.
+ builder.prefix %{
+ typedef struct
+ {
+ unsigned int end;
+ unsigned int mis;
+ unsigned int ins;
+ unsigned int del;
+ } match;
+ }
+
# Bitmap for matching nucleotides including ambiguity codes.
# For each value bits are set from the left: bit pos 1 for A,
# bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G.
# mismatches, ins insertions and del deletions. ss is the start of the sequence, used only for
# reporting the match endpoints.
builder.prefix %{
- unsigned int backtrack(
- char *ss, // Sequence start
- char *s, // Sequence
- char *p, // Pattern
- int mis, // Max mismatches
- int ins, // Max insertions
- int del // Max deletions
+ match* backtrack(
+ char *ss, // Sequence start
+ char *s, // Sequence
+ char *p, // Pattern
+ unsigned int mis, // Max mismatches
+ unsigned int ins, // Max insertions
+ unsigned int del // Max deletions
)
{
- unsigned int r = 0;
+ match found = {0, 0, 0, 0};
+ match *f = &found;
while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment
if (!*p)
- return (unsigned int) (s - ss);
+ {
+ f->end = (unsigned int) (s - ss);
+ f->mis = mis;
+ f->ins = ins;
+ f->del = del;
+
+ return f;
+ }
else
{
- if (mis && *s && *p && (r = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return r;
- if (ins && *p && (r = backtrack(ss, s, p + 1, mis, ins - 1, del))) return r;
- if (del && *s && (r = backtrack(ss, s + 1, p, mis, ins, del - 1))) return r;
+ if (mis && *s && *p && (f = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return f;
+ if (ins && *p && (f = backtrack(ss, s, p + 1, mis, ins - 1, del))) return f;
+ if (del && *s && (f = backtrack(ss, s + 1, p, mis, ins, del - 1))) return f;
}
- return 0;
+ return NULL;
}
}
# insertions and del deletions.
builder.c %{
VALUE track_C(
- VALUE _s, // Sequence
- VALUE _len, // Sequence length
- VALUE _p, // Pattern
- VALUE _pos, // Start position
- VALUE _mis, // Maximum mismatches
- VALUE _ins, // Maximum insertions
- VALUE _del // Maximum deletions
+ VALUE _s, // Sequence
+ VALUE _len, // Sequence length
+ VALUE _p, // Pattern
+ VALUE _pos, // Start position
+ VALUE _max_mis, // Maximum mismatches
+ VALUE _max_ins, // Maximum insertions
+ VALUE _max_del // Maximum deletions
)
{
- char *s = StringValuePtr(_s);
- char *p = StringValuePtr(_p);
- unsigned int len = FIX2UINT(_len);
- unsigned int pos = FIX2UINT(_pos);
- unsigned int mis = FIX2UINT(_mis);
- unsigned int ins = FIX2UINT(_ins);
- unsigned int del = FIX2UINT(_del);
-
- char *ss = s;
- unsigned int e = 0;
- VALUE tuple;
+ char *s = StringValuePtr(_s);
+ char *p = StringValuePtr(_p);
+ unsigned int len = FIX2UINT(_len);
+ unsigned int pos = FIX2UINT(_pos);
+ unsigned int max_mis = FIX2UINT(_max_mis);
+ unsigned int max_ins = FIX2UINT(_max_ins);
+ unsigned int max_del = FIX2UINT(_max_del);
+
+ char *ss = s;
+ match *f = NULL;
+ unsigned int mbeg = 0;
+ unsigned int mlen = 0;
+ unsigned int mmis = 0;
+ unsigned int mins = 0;
+ unsigned int mdel = 0;
+ VALUE ary = 0;
if (pos < len)
{
while (*s)
{
- if ((e = backtrack(ss, s, p, mis, ins, del)))
+ if ((f = backtrack(ss, s, p, max_mis, max_ins, max_del)))
{
- tuple = rb_ary_new();
- rb_ary_push(tuple, INT2FIX((int) (s - ss)));
- rb_ary_push(tuple, INT2FIX((int) e - (s - ss)));
- return tuple;
+ mbeg = (s - ss);
+ mlen = f->end - mbeg;
+ mmis = max_mis - f->mis;
+ mins = max_ins - f->ins;
+ mdel = max_del - f->del;
+
+ ary = rb_ary_new();
+ rb_ary_push(ary, INT2FIX(mbeg));
+ rb_ary_push(ary, INT2FIX(mlen));
+ rb_ary_push(ary, INT2FIX(mmis));
+ rb_ary_push(ary, INT2FIX(mins));
+ rb_ary_push(ary, INT2FIX(mdel));
+ return ary;
}
s++;
# Class containing match information.
class Match
- attr_reader :pos, :length, :match
+ attr_reader :pos, :length, :mis, :ins, :del, :match
- def initialize(pos, length, match)
+ def initialize(pos, length, mis, ins, del, match)
@pos = pos
@length = length
+ @mis = mis
+ @ins = ins
+ @del = del
@match = match
end
def to_s
- "#{pos}:#{length}:#{match}"
+ "#{@pos}:#{@length}:#{@mis}:#{@ins}:#{@del}:#{@match}"
end
end
end
end
def test_BackTrack_patscan_perfect_left_is_ok
- assert_equal("0:7:tacgatg", @seq.patscan("TACGATG").first.to_s)
+ assert_equal("0:7:0:0:0:tacgatg", @seq.patscan("TACGATG").first.to_s)
end
def test_BackTrack_patscan_perfect_right_is_ok
- assert_equal("13:7:tgcacgg", @seq.patscan("TGCACGG").first.to_s)
+ assert_equal("13:7:0:0:0:tgcacgg", @seq.patscan("TGCACGG").first.to_s)
end
def test_BackTrack_patscan_ambiguity_is_ok
- assert_equal("13:7:tgcacgg", @seq.patscan("TGCACNN").first.to_s)
+ assert_equal("13:7:0:0:0:tgcacgg", @seq.patscan("TGCACNN").first.to_s)
end
def test_BackTrack_patscan_pos_is_ok
- assert_equal("10:1:g", @seq.patscan("N", 10).first.to_s)
- assert_equal("19:1:g", @seq.patscan("N", 10).last.to_s)
+ assert_equal("10:1:0:0:0:g", @seq.patscan("N", 10).first.to_s)
+ assert_equal("19:1:0:0:0:g", @seq.patscan("N", 10).last.to_s)
end
def test_BackTrack_patscan_mis_left_is_ok
- assert_equal("0:7:tacgatg", @seq.patscan("Aacgatg", 0, 1).first.to_s)
+ assert_equal("0:7:1:0:0:tacgatg", @seq.patscan("Aacgatg", 0, 1).first.to_s)
end
def test_BackTrack_patscan_mis_right_is_ok
- assert_equal("13:7:tgcacgg", @seq.patscan("tgcacgA", 0, 1).first.to_s)
+ assert_equal("13:7:1:0:0:tgcacgg", @seq.patscan("tgcacgA", 0, 1).first.to_s)
end
def test_BackTrack_patscan_ins_left_is_ok
- assert_equal("0:7:tacgatg", @seq.patscan("Atacgatg", 0, 0, 1).first.to_s)
+ assert_equal("0:7:0:1:0:tacgatg", @seq.patscan("Atacgatg", 0, 0, 1).first.to_s)
end
def test_BackTrack_patscan_ins_right_is_ok
- assert_equal("13:7:tgcacgg", @seq.patscan("tgcacggA", 0, 0, 1).first.to_s)
+ assert_equal("13:7:0:1:0:tgcacgg", @seq.patscan("tgcacggA", 0, 0, 1).first.to_s)
end
def test_BackTrack_patscan_del_left_is_ok
- assert_equal("0:7:tacgatg", @seq.patscan("acgatg", 0, 0, 0, 1).first.to_s)
+ assert_equal("0:7:0:0:1:tacgatg", @seq.patscan("acgatg", 0, 0, 0, 1).first.to_s)
end
def test_BackTrack_patscan_del_right_is_ok
- assert_equal("12:8:atgcacgg", @seq.patscan("tgcacgg", 0, 0, 0, 1).first.to_s)
+ assert_equal("12:8:0:0:1:atgcacgg", @seq.patscan("tgcacgg", 0, 0, 0, 1).first.to_s)
end
def test_BackTrack_patscan_ambiguity_mis_ins_del_all_ok
- assert_equal("0:20:tacgatgctagcatgcacgg", @seq.patscan("tacatgcNagGatgcCacgg", 0, 1, 1, 1).first.to_s)
+ assert_equal("0:20:1:1:1:tacgatgctagcatgcacgg", @seq.patscan("tacatgcNagGatgcCacgg", 0, 1, 1, 1).first.to_s)
end
end