--- /dev/null
+# Copyright (C) 2007-2012 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+class Matches
+ def self.find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
+ m = self.new
+ m.matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
+ end
+
+ # Method that finds all maximally expanded non-redundant matches shared
+ # between two sequences inside a given search space.
+ def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
+ matches = []
+ redundant = Hash.new { |h, k| h[k] = [] }
+
+ s_index = index_seq(s_seq, s_min, s_max, kmer)
+
+ q_pos = q_min
+
+ while q_pos <= q_max - kmer + 1
+ q_oligo = q_seq[q_pos ... q_pos + kmer]
+
+ s_index[q_oligo].each do |s_pos|
+ match = Match.new(q_pos, s_pos, kmer)
+ unless match_redundant?(redundant, match)
+ match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
+ matches << match
+
+ match_redundant_add(redundant, match)
+ end
+ end
+
+ q_pos += 1
+ end
+
+ matches
+ end
+
+ # Method that indexes a sequence within a given interval such that the
+ # index contains all oligos of a given kmer size and the positions where
+ # this oligo was located.
+ def index_seq(seq, min, max, kmer)
+ index_hash = Hash.new { |h, k| h[k] = [] }
+
+ pos = min
+
+ while pos <= max - kmer + 1
+ oligo = seq[pos ... pos + kmer]
+ index_hash[oligo] << pos
+
+ pos += 1
+ end
+
+ index_hash
+ end
+
+ # Method to check if a match is redundant.
+ def match_redundant?(redundant, match)
+ redundant[match.q_beg].each do |s_interval|
+ if s_interval.include? match.s_beg and s_interval.include? match.s_end
+ return true
+ end
+ end
+
+ false
+ end
+
+ # Method that adds a match to the redundancy index.
+ def match_redundant_add(redundant, match)
+ (match.q_beg .. match.q_end).each do |q|
+ redundant[q] << (match.s_beg .. match.s_end)
+ end
+ end
+
+ # Method that expands a match as far as possible to the left and right.
+ def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
+ match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
+ match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
+
+ match
+ end
+
+ # Method that expands a match as far as possible to the left.
+ def match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
+ while match.q_beg > q_min and
+ match.s_beg > s_min and
+ q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1]
+ match.q_beg -= 1
+ match.s_beg -= 1
+ match.length += 1
+ end
+
+ match
+ end
+
+ # Method that expands a match as far as possible to the right.
+ def match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
+ while match.q_end < q_max and
+ match.s_end < s_max and
+ q_seq[match.q_end + 1] == s_seq[match.s_end + 1]
+ match.length += 1
+ end
+
+ match
+ end
+
+ # Class for containing a match between two sequences q and s.
+ class Match
+ attr_accessor :q_beg, :s_beg, :length, :score
+
+ def initialize(q_beg, s_beg, length, score = 0.0)
+ @q_beg = q_beg
+ @s_beg = s_beg
+ @length = length
+ @score = score
+ end
+
+ def q_end
+ @q_beg + @length - 1
+ end
+
+ def s_end
+ @s_beg + @length - 1
+ end
+
+ def to_s(seq = nil)
+ s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}"
+ s << " seq: #{seq[@q_beg .. q_end]}" if seq
+ s
+ end
+ end
+end