From: martinahansen Date: Wed, 2 Jan 2013 09:49:50 +0000 (+0000) Subject: renaming match.rb to matches.rb X-Git-Url: https://git.donarmstrong.com/?p=biopieces.git;a=commitdiff_plain;h=62ccc0892d1ec2497796aa340bf97c663511996f renaming match.rb to matches.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@2049 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/align/match.rb b/code_ruby/lib/maasha/align/match.rb deleted file mode 100644 index 17f27e2..0000000 --- a/code_ruby/lib/maasha/align/match.rb +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (C) 2007-2012 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Class containing methods for located all non-redundant Maximally Extended Matches (MEMs) -# between two strings. -class Matches - # Class method to located all non-redudant MEMs bewteen two sequences within a given - # search space and seeded with kmers of a given size. - def self.find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) - m = self.new - m.matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) - end - - # Method that finds all maximally expanded non-redundant matches shared - # between two sequences inside a given search space. - def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) - matches = [] - redundant = Hash.new { |h, k| h[k] = [] } - - s_index = index_seq(s_seq, s_min, s_max, kmer) - - q_pos = q_min - - while q_pos <= q_max - kmer + 1 - q_oligo = q_seq[q_pos ... q_pos + kmer] - - s_index[q_oligo].each do |s_pos| - match = Match.new(q_pos, s_pos, kmer) - - unless match_redundant?(redundant, match) - match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - matches << match - - match_redundant_add(redundant, match) - end - end - - q_pos += 1 - end - - matches - end - - # Method that indexes a sequence within a given interval such that the - # index contains all oligos of a given kmer size and the positions where - # this oligo was located. - def index_seq(seq, min, max, kmer, step = 1) - index_hash = Hash.new { |h, k| h[k] = [] } - - pos = min - - while pos <= max - kmer + 1 - oligo = seq[pos ... pos + kmer] - index_hash[oligo] << pos - - pos += step - end - - index_hash - end - - # Method to check if a match is redundant. - def match_redundant?(redundant, match) - redundant[match.q_beg].each do |s_interval| - if s_interval.include? match.s_beg and s_interval.include? match.s_end # TODO test if include? is slow - return true - end - end - - false - end - - # Method that adds a match to the redundancy index. - def match_redundant_add(redundant, match) - (match.q_beg .. match.q_end).each do |q| - redundant[q] << (match.s_beg .. match.s_end) - end - end - - # Method that expands a match as far as possible to the left and right. - def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - - match - end - - # Method that expands a match as far as possible to the left. - def match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - while match.q_beg > q_min and - match.s_beg > s_min and - q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1] - match.q_beg -= 1 - match.s_beg -= 1 - match.length += 1 - end - - match - end - - # Method that expands a match as far as possible to the right. - def match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - while match.q_end < q_max and - match.s_end < s_max and - q_seq[match.q_end + 1] == s_seq[match.s_end + 1] - match.length += 1 - end - - match - end - - # Class for containing a match between two sequences q and s. - class Match - attr_accessor :q_beg, :s_beg, :length, :score - - def initialize(q_beg, s_beg, length, score = 0.0) - @q_beg = q_beg - @s_beg = s_beg - @length = length - @score = score - end - - def q_end - @q_beg + @length - 1 - end - - def s_end - @s_beg + @length - 1 - end - - def to_s(seq = nil) - s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}" - s << " seq: #{seq[@q_beg .. q_end]}" if seq - s - end - end -end diff --git a/code_ruby/lib/maasha/align/matches.rb b/code_ruby/lib/maasha/align/matches.rb new file mode 100644 index 0000000..17f27e2 --- /dev/null +++ b/code_ruby/lib/maasha/align/matches.rb @@ -0,0 +1,158 @@ +# Copyright (C) 2007-2012 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Class containing methods for located all non-redundant Maximally Extended Matches (MEMs) +# between two strings. +class Matches + # Class method to located all non-redudant MEMs bewteen two sequences within a given + # search space and seeded with kmers of a given size. + def self.find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) + m = self.new + m.matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) + end + + # Method that finds all maximally expanded non-redundant matches shared + # between two sequences inside a given search space. + def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) + matches = [] + redundant = Hash.new { |h, k| h[k] = [] } + + s_index = index_seq(s_seq, s_min, s_max, kmer) + + q_pos = q_min + + while q_pos <= q_max - kmer + 1 + q_oligo = q_seq[q_pos ... q_pos + kmer] + + s_index[q_oligo].each do |s_pos| + match = Match.new(q_pos, s_pos, kmer) + + unless match_redundant?(redundant, match) + match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + matches << match + + match_redundant_add(redundant, match) + end + end + + q_pos += 1 + end + + matches + end + + # Method that indexes a sequence within a given interval such that the + # index contains all oligos of a given kmer size and the positions where + # this oligo was located. + def index_seq(seq, min, max, kmer, step = 1) + index_hash = Hash.new { |h, k| h[k] = [] } + + pos = min + + while pos <= max - kmer + 1 + oligo = seq[pos ... pos + kmer] + index_hash[oligo] << pos + + pos += step + end + + index_hash + end + + # Method to check if a match is redundant. + def match_redundant?(redundant, match) + redundant[match.q_beg].each do |s_interval| + if s_interval.include? match.s_beg and s_interval.include? match.s_end # TODO test if include? is slow + return true + end + end + + false + end + + # Method that adds a match to the redundancy index. + def match_redundant_add(redundant, match) + (match.q_beg .. match.q_end).each do |q| + redundant[q] << (match.s_beg .. match.s_end) + end + end + + # Method that expands a match as far as possible to the left and right. + def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + + match + end + + # Method that expands a match as far as possible to the left. + def match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + while match.q_beg > q_min and + match.s_beg > s_min and + q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1] + match.q_beg -= 1 + match.s_beg -= 1 + match.length += 1 + end + + match + end + + # Method that expands a match as far as possible to the right. + def match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + while match.q_end < q_max and + match.s_end < s_max and + q_seq[match.q_end + 1] == s_seq[match.s_end + 1] + match.length += 1 + end + + match + end + + # Class for containing a match between two sequences q and s. + class Match + attr_accessor :q_beg, :s_beg, :length, :score + + def initialize(q_beg, s_beg, length, score = 0.0) + @q_beg = q_beg + @s_beg = s_beg + @length = length + @score = score + end + + def q_end + @q_beg + @length - 1 + end + + def s_end + @s_beg + @length - 1 + end + + def to_s(seq = nil) + s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}" + s << " seq: #{seq[@q_beg .. q_end]}" if seq + s + end + end +end