From 38c892c7778ac261d0e233c8881e27e2c7afbb94 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 27 Nov 2012 20:52:44 +0000 Subject: [PATCH] polished backtrack.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@2006 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq/backtrack.rb | 105 +++++++++++++++----------- 1 file changed, 63 insertions(+), 42 deletions(-) diff --git a/code_ruby/lib/maasha/seq/backtrack.rb b/code_ruby/lib/maasha/seq/backtrack.rb index d97fe87..088b596 100644 --- a/code_ruby/lib/maasha/seq/backtrack.rb +++ b/code_ruby/lib/maasha/seq/backtrack.rb @@ -65,12 +65,12 @@ module BackTrack matches = [] if block_given? - while result = self.track(pattern, offset, max_mismatches, max_insertions, max_deletions) + while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) offset = result.first + 1 end else - while result = self.track(pattern, offset, max_mismatches, max_insertions, max_deletions) + while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) offset = result.first + 1 end @@ -86,14 +86,14 @@ module BackTrack # Macro for matching nucleotides including ambiguity codes. builder.prefix %{ - #define MATCH(A,B) ((bitmap[(unsigned short) A] & bitmap[(unsigned short) B]) != 0) + #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0) } # Bitmap for matching nucleotides including ambiguity codes. # For each value bits are set from the left: bit pos 1 for A, # bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G. builder.prefix %{ - unsigned short bitmap[256] = { + char bitmap[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -113,60 +113,81 @@ module BackTrack }; } - # Backtrack algorithm for matching a pattern (p) starting in a sequence (s) allowing for mm + # Backtrack algorithm for matching a pattern (p) starting in a sequence (s) allowing for mis # mismatches, ins insertions and del deletions. ss is the start of the sequence, used only for # reporting the match endpoints. builder.prefix %{ - unsigned int backtrack(char* ss, char* s, char* p, int mm, int ins, int del) + unsigned int backtrack( + char *ss, // Sequence start + char *s, // Sequence + char *p, // Pattern + int mis, // Max mismatches + int ins, // Max insertions + int del // Max deletions + ) { - unsigned int r = 0; + unsigned int r = 0; - while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment + while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment - if (!*p) - return (unsigned int) (s - ss); - else - { - if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r; - if (ins && *p && (r = backtrack(ss, s, p + 1, mm, ins - 1, del))) return r; - if (del && *s && (r = backtrack(ss, s + 1, p, mm, ins, del - 1))) return r; - } + if (!*p) + return (unsigned int) (s - ss); + else + { + if (mis && *s && *p && (r = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return r; + if (ins && *p && (r = backtrack(ss, s, p + 1, mis, ins - 1, del))) return r; + if (del && *s && (r = backtrack(ss, s + 1, p, mis, ins, del - 1))) return r; + } - return 0; + return 0; } } - # Find pattern (p) in a sequence (@seq) starting at pos, with at most mm mismatches, ins + # Find pattern (p) in a sequence (s) starting at pos, with at most mis mismatches, ins # insertions and del deletions. builder.c %{ - static VALUE track(char* p, unsigned int pos, int mm, int ins, int del) + VALUE track_C( + VALUE _s, // Sequence + VALUE _len, // Sequence length + VALUE _p, // Pattern + VALUE _pos, // Start position + VALUE _mis, // Maximum mismatches + VALUE _ins, // Maximum insertions + VALUE _del // Maximum deletions + ) { - VALUE seq = rb_ivar_get(self, id_seq); - char* s = StringValuePtr(seq); - char* ss = s; - unsigned int e; - VALUE tuple; - - if (pos < strlen(s)) + char *s = StringValuePtr(_s); + char *p = StringValuePtr(_p); + unsigned int len = FIX2UINT(_len); + unsigned int pos = FIX2UINT(_pos); + unsigned int mis = FIX2UINT(_mis); + unsigned int ins = FIX2UINT(_ins); + unsigned int del = FIX2UINT(_del); + + char *ss = s; + unsigned int e = 0; + VALUE tuple; + + if (pos < len) + { + s += pos; + + while (*s) { - s += pos; - - while (*s) - { - if ((e = backtrack(ss, s, p, mm, ins, del))) - { - tuple = rb_ary_new(); - rb_ary_push(tuple, INT2FIX((int) (s - ss))); - rb_ary_push(tuple, INT2FIX((int) e - (s - ss))); - return tuple; - } - - s++; - } + if ((e = backtrack(ss, s, p, mis, ins, del))) + { + tuple = rb_ary_new(); + rb_ary_push(tuple, INT2FIX((int) (s - ss))); + rb_ary_push(tuple, INT2FIX((int) e - (s - ss))); + return tuple; + } + + s++; } + } - return Qnil; - } + return Qnil; + } } end -- 2.39.5