X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Falign%2Fmatches.rb;h=009db07cbc225f08a2df4690ade97bb4e557de87;hb=e95fec9ae319e4da65e7dd5700abf7b4a036b411;hp=17f27e2ee982a314125dffe5fdf497e2fac2c39d;hpb=3c7f9678a0e3344d809487e5e4af048570b50e26;p=biopieces.git diff --git a/code_ruby/lib/maasha/align/matches.rb b/code_ruby/lib/maasha/align/matches.rb index 17f27e2..009db07 100644 --- a/code_ruby/lib/maasha/align/matches.rb +++ b/code_ruby/lib/maasha/align/matches.rb @@ -22,6 +22,8 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/align/match' + # Class containing methods for located all non-redundant Maximally Extended Matches (MEMs) # between two strings. class Matches @@ -36,7 +38,7 @@ class Matches # between two sequences inside a given search space. def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) matches = [] - redundant = Hash.new { |h, k| h[k] = [] } + redundant = {} s_index = index_seq(s_seq, s_min, s_max, kmer) @@ -48,11 +50,13 @@ class Matches s_index[q_oligo].each do |s_pos| match = Match.new(q_pos, s_pos, kmer) - unless match_redundant?(redundant, match) - match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + unless redundant[match.q_beg * 1_000_000_000 + match.s_beg] + match.expand(q_seq, s_seq, q_min, s_min, q_max, s_max) matches << match - match_redundant_add(redundant, match) + (0 ... match.length).each do |j| + redundant[(match.q_beg + j) * 1_000_000_000 + match.s_beg + j] = true + end end end @@ -79,80 +83,4 @@ class Matches index_hash end - - # Method to check if a match is redundant. - def match_redundant?(redundant, match) - redundant[match.q_beg].each do |s_interval| - if s_interval.include? match.s_beg and s_interval.include? match.s_end # TODO test if include? is slow - return true - end - end - - false - end - - # Method that adds a match to the redundancy index. - def match_redundant_add(redundant, match) - (match.q_beg .. match.q_end).each do |q| - redundant[q] << (match.s_beg .. match.s_end) - end - end - - # Method that expands a match as far as possible to the left and right. - def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - - match - end - - # Method that expands a match as far as possible to the left. - def match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - while match.q_beg > q_min and - match.s_beg > s_min and - q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1] - match.q_beg -= 1 - match.s_beg -= 1 - match.length += 1 - end - - match - end - - # Method that expands a match as far as possible to the right. - def match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - while match.q_end < q_max and - match.s_end < s_max and - q_seq[match.q_end + 1] == s_seq[match.s_end + 1] - match.length += 1 - end - - match - end - - # Class for containing a match between two sequences q and s. - class Match - attr_accessor :q_beg, :s_beg, :length, :score - - def initialize(q_beg, s_beg, length, score = 0.0) - @q_beg = q_beg - @s_beg = s_beg - @length = length - @score = score - end - - def q_end - @q_beg + @length - 1 - end - - def s_end - @s_beg + @length - 1 - end - - def to_s(seq = nil) - s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}" - s << " seq: #{seq[@q_beg .. q_end]}" if seq - s - end - end end