From 5e95615675184f4f73ee15fa1bace9e55426d8ba Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 28 Nov 2012 10:35:46 +0000 Subject: [PATCH 1/1] added output of mis ind and del to backtrack code git-svn-id: http://biopieces.googlecode.com/svn/trunk@2007 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq/backtrack.rb | 133 ++++++++++++-------- code_ruby/test/maasha/seq/test_backtrack.rb | 24 ++-- 2 files changed, 95 insertions(+), 62 deletions(-) diff --git a/code_ruby/lib/maasha/seq/backtrack.rb b/code_ruby/lib/maasha/seq/backtrack.rb index 088b596..0bb5789 100644 --- a/code_ruby/lib/maasha/seq/backtrack.rb +++ b/code_ruby/lib/maasha/seq/backtrack.rb @@ -64,16 +64,15 @@ module BackTrack matches = [] - if block_given? - while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) - yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) - offset = result.first + 1 - end - else - while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) - matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) - offset = result.first + 1 + while result = track_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) + match = Match.new(result[0], result[1], result[2], result[3], result[4], self.seq[result[0] ... result[0] + result[1]]) + + if block_given? + yield match + else + matches << match end + offset = match.pos + 1 end return matches.empty? ? nil : matches unless block_given? @@ -82,13 +81,22 @@ module BackTrack private inline do |builder| - builder.add_static "id_seq", 'rb_intern("@seq")', "ID" - # Macro for matching nucleotides including ambiguity codes. builder.prefix %{ #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0) } + # Defining a struct for returning match information. + builder.prefix %{ + typedef struct + { + unsigned int end; + unsigned int mis; + unsigned int ins; + unsigned int del; + } match; + } + # Bitmap for matching nucleotides including ambiguity codes. # For each value bits are set from the left: bit pos 1 for A, # bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G. @@ -117,29 +125,37 @@ module BackTrack # mismatches, ins insertions and del deletions. ss is the start of the sequence, used only for # reporting the match endpoints. builder.prefix %{ - unsigned int backtrack( - char *ss, // Sequence start - char *s, // Sequence - char *p, // Pattern - int mis, // Max mismatches - int ins, // Max insertions - int del // Max deletions + match* backtrack( + char *ss, // Sequence start + char *s, // Sequence + char *p, // Pattern + unsigned int mis, // Max mismatches + unsigned int ins, // Max insertions + unsigned int del // Max deletions ) { - unsigned int r = 0; + match found = {0, 0, 0, 0}; + match *f = &found; while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment if (!*p) - return (unsigned int) (s - ss); + { + f->end = (unsigned int) (s - ss); + f->mis = mis; + f->ins = ins; + f->del = del; + + return f; + } else { - if (mis && *s && *p && (r = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return r; - if (ins && *p && (r = backtrack(ss, s, p + 1, mis, ins - 1, del))) return r; - if (del && *s && (r = backtrack(ss, s + 1, p, mis, ins, del - 1))) return r; + if (mis && *s && *p && (f = backtrack(ss, s + 1, p + 1, mis - 1, ins, del))) return f; + if (ins && *p && (f = backtrack(ss, s, p + 1, mis, ins - 1, del))) return f; + if (del && *s && (f = backtrack(ss, s + 1, p, mis, ins, del - 1))) return f; } - return 0; + return NULL; } } @@ -147,26 +163,31 @@ module BackTrack # insertions and del deletions. builder.c %{ VALUE track_C( - VALUE _s, // Sequence - VALUE _len, // Sequence length - VALUE _p, // Pattern - VALUE _pos, // Start position - VALUE _mis, // Maximum mismatches - VALUE _ins, // Maximum insertions - VALUE _del // Maximum deletions + VALUE _s, // Sequence + VALUE _len, // Sequence length + VALUE _p, // Pattern + VALUE _pos, // Start position + VALUE _max_mis, // Maximum mismatches + VALUE _max_ins, // Maximum insertions + VALUE _max_del // Maximum deletions ) { - char *s = StringValuePtr(_s); - char *p = StringValuePtr(_p); - unsigned int len = FIX2UINT(_len); - unsigned int pos = FIX2UINT(_pos); - unsigned int mis = FIX2UINT(_mis); - unsigned int ins = FIX2UINT(_ins); - unsigned int del = FIX2UINT(_del); - - char *ss = s; - unsigned int e = 0; - VALUE tuple; + char *s = StringValuePtr(_s); + char *p = StringValuePtr(_p); + unsigned int len = FIX2UINT(_len); + unsigned int pos = FIX2UINT(_pos); + unsigned int max_mis = FIX2UINT(_max_mis); + unsigned int max_ins = FIX2UINT(_max_ins); + unsigned int max_del = FIX2UINT(_max_del); + + char *ss = s; + match *f = NULL; + unsigned int mbeg = 0; + unsigned int mlen = 0; + unsigned int mmis = 0; + unsigned int mins = 0; + unsigned int mdel = 0; + VALUE ary = 0; if (pos < len) { @@ -174,12 +195,21 @@ module BackTrack while (*s) { - if ((e = backtrack(ss, s, p, mis, ins, del))) + if ((f = backtrack(ss, s, p, max_mis, max_ins, max_del))) { - tuple = rb_ary_new(); - rb_ary_push(tuple, INT2FIX((int) (s - ss))); - rb_ary_push(tuple, INT2FIX((int) e - (s - ss))); - return tuple; + mbeg = (s - ss); + mlen = f->end - mbeg; + mmis = max_mis - f->mis; + mins = max_ins - f->ins; + mdel = max_del - f->del; + + ary = rb_ary_new(); + rb_ary_push(ary, INT2FIX(mbeg)); + rb_ary_push(ary, INT2FIX(mlen)); + rb_ary_push(ary, INT2FIX(mmis)); + rb_ary_push(ary, INT2FIX(mins)); + rb_ary_push(ary, INT2FIX(mdel)); + return ary; } s++; @@ -193,16 +223,19 @@ module BackTrack # Class containing match information. class Match - attr_reader :pos, :length, :match + attr_reader :pos, :length, :mis, :ins, :del, :match - def initialize(pos, length, match) + def initialize(pos, length, mis, ins, del, match) @pos = pos @length = length + @mis = mis + @ins = ins + @del = del @match = match end def to_s - "#{pos}:#{length}:#{match}" + "#{@pos}:#{@length}:#{@mis}:#{@ins}:#{@del}:#{@match}" end end end diff --git a/code_ruby/test/maasha/seq/test_backtrack.rb b/code_ruby/test/maasha/seq/test_backtrack.rb index b2bc7f7..43af1de 100644 --- a/code_ruby/test/maasha/seq/test_backtrack.rb +++ b/code_ruby/test/maasha/seq/test_backtrack.rb @@ -96,48 +96,48 @@ class BackTrackTest < Test::Unit::TestCase end def test_BackTrack_patscan_perfect_left_is_ok - assert_equal("0:7:tacgatg", @seq.patscan("TACGATG").first.to_s) + assert_equal("0:7:0:0:0:tacgatg", @seq.patscan("TACGATG").first.to_s) end def test_BackTrack_patscan_perfect_right_is_ok - assert_equal("13:7:tgcacgg", @seq.patscan("TGCACGG").first.to_s) + assert_equal("13:7:0:0:0:tgcacgg", @seq.patscan("TGCACGG").first.to_s) end def test_BackTrack_patscan_ambiguity_is_ok - assert_equal("13:7:tgcacgg", @seq.patscan("TGCACNN").first.to_s) + assert_equal("13:7:0:0:0:tgcacgg", @seq.patscan("TGCACNN").first.to_s) end def test_BackTrack_patscan_pos_is_ok - assert_equal("10:1:g", @seq.patscan("N", 10).first.to_s) - assert_equal("19:1:g", @seq.patscan("N", 10).last.to_s) + assert_equal("10:1:0:0:0:g", @seq.patscan("N", 10).first.to_s) + assert_equal("19:1:0:0:0:g", @seq.patscan("N", 10).last.to_s) end def test_BackTrack_patscan_mis_left_is_ok - assert_equal("0:7:tacgatg", @seq.patscan("Aacgatg", 0, 1).first.to_s) + assert_equal("0:7:1:0:0:tacgatg", @seq.patscan("Aacgatg", 0, 1).first.to_s) end def test_BackTrack_patscan_mis_right_is_ok - assert_equal("13:7:tgcacgg", @seq.patscan("tgcacgA", 0, 1).first.to_s) + assert_equal("13:7:1:0:0:tgcacgg", @seq.patscan("tgcacgA", 0, 1).first.to_s) end def test_BackTrack_patscan_ins_left_is_ok - assert_equal("0:7:tacgatg", @seq.patscan("Atacgatg", 0, 0, 1).first.to_s) + assert_equal("0:7:0:1:0:tacgatg", @seq.patscan("Atacgatg", 0, 0, 1).first.to_s) end def test_BackTrack_patscan_ins_right_is_ok - assert_equal("13:7:tgcacgg", @seq.patscan("tgcacggA", 0, 0, 1).first.to_s) + assert_equal("13:7:0:1:0:tgcacgg", @seq.patscan("tgcacggA", 0, 0, 1).first.to_s) end def test_BackTrack_patscan_del_left_is_ok - assert_equal("0:7:tacgatg", @seq.patscan("acgatg", 0, 0, 0, 1).first.to_s) + assert_equal("0:7:0:0:1:tacgatg", @seq.patscan("acgatg", 0, 0, 0, 1).first.to_s) end def test_BackTrack_patscan_del_right_is_ok - assert_equal("12:8:atgcacgg", @seq.patscan("tgcacgg", 0, 0, 0, 1).first.to_s) + assert_equal("12:8:0:0:1:atgcacgg", @seq.patscan("tgcacgg", 0, 0, 0, 1).first.to_s) end def test_BackTrack_patscan_ambiguity_mis_ins_del_all_ok - assert_equal("0:20:tacgatgctagcatgcacgg", @seq.patscan("tacatgcNagGatgcCacgg", 0, 1, 1, 1).first.to_s) + assert_equal("0:20:1:1:1:tacgatgctagcatgcacgg", @seq.patscan("tacatgcNagGatgcCacgg", 0, 1, 1, 1).first.to_s) end end -- 2.39.2