1 # Copyright (C) 2007-2012 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Class containing methods for located all Maximally Extended Matches (MEMs) between
28 # Class method to located all MEMs bewteen two sequences within a given search
29 # space and seed with kmers of a given size.
30 def self.find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
32 m.matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
35 # Method that finds all maximally expanded non-redundant matches shared
36 # between two sequences inside a given search space.
37 def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
39 redundant = Hash.new { |h, k| h[k] = [] }
41 s_index = index_seq(s_seq, s_min, s_max, kmer)
45 while q_pos <= q_max - kmer + 1
46 q_oligo = q_seq[q_pos ... q_pos + kmer]
48 s_index[q_oligo].each do |s_pos|
49 match = Match.new(q_pos, s_pos, kmer)
50 unless match_redundant?(redundant, match)
51 match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
54 match_redundant_add(redundant, match)
64 # Method that indexes a sequence within a given interval such that the
65 # index contains all oligos of a given kmer size and the positions where
66 # this oligo was located.
67 def index_seq(seq, min, max, kmer, step = 1)
68 index_hash = Hash.new { |h, k| h[k] = [] }
72 while pos <= max - kmer + 1
73 oligo = seq[pos ... pos + kmer]
74 index_hash[oligo] << pos
82 # Method to check if a match is redundant.
83 def match_redundant?(redundant, match)
84 redundant[match.q_beg].each do |s_interval|
85 if s_interval.include? match.s_beg and s_interval.include? match.s_end
93 # Method that adds a match to the redundancy index.
94 def match_redundant_add(redundant, match)
95 (match.q_beg .. match.q_end).each do |q|
96 redundant[q] << (match.s_beg .. match.s_end)
100 # Method that expands a match as far as possible to the left and right.
101 def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
102 match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
103 match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
108 # Method that expands a match as far as possible to the left.
109 def match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
110 while match.q_beg > q_min and
111 match.s_beg > s_min and
112 q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1]
121 # Method that expands a match as far as possible to the right.
122 def match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
123 while match.q_end < q_max and
124 match.s_end < s_max and
125 q_seq[match.q_end + 1] == s_seq[match.s_end + 1]
132 # Class for containing a match between two sequences q and s.
134 attr_accessor :q_beg, :s_beg, :length, :score
136 def initialize(q_beg, s_beg, length, score = 0.0)
152 s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}"
153 s << " seq: #{seq[@q_beg .. q_end]}" if seq