From e95fec9ae319e4da65e7dd5700abf7b4a036b411 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 3 Jan 2013 15:15:12 +0000 Subject: [PATCH] polished pairwise alignment code git-svn-id: http://biopieces.googlecode.com/svn/trunk@2053 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/align/match.rb | 13 ++++ code_ruby/lib/maasha/align/matches.rb | 88 +++------------------------ code_ruby/lib/maasha/align/pair.rb | 3 +- 3 files changed, 23 insertions(+), 81 deletions(-) diff --git a/code_ruby/lib/maasha/align/match.rb b/code_ruby/lib/maasha/align/match.rb index d6a8008..784a2d1 100644 --- a/code_ruby/lib/maasha/align/match.rb +++ b/code_ruby/lib/maasha/align/match.rb @@ -23,6 +23,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'inline' +require 'maasha/math_aux' # Class for describing a match between two sequences q and s. class Match @@ -57,6 +58,18 @@ class Match self end + def redundant?(matches) + # Speed-up with binary search + + matches.each do |m| + if Math.dist_point2line(self.q_beg, self.s_beg, m.q_beg, m.s_beg, m.q_end, m.s_end) == 0 + return true + end + end + + false + end + def to_s(seq = nil) s = "q: #{@q_beg}, s: #{@s_beg}, l: #{@length}, s: #{@score}" s << " #{seq[@q_beg .. q_end]}" if seq diff --git a/code_ruby/lib/maasha/align/matches.rb b/code_ruby/lib/maasha/align/matches.rb index 17f27e2..009db07 100644 --- a/code_ruby/lib/maasha/align/matches.rb +++ b/code_ruby/lib/maasha/align/matches.rb @@ -22,6 +22,8 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/align/match' + # Class containing methods for located all non-redundant Maximally Extended Matches (MEMs) # between two strings. class Matches @@ -36,7 +38,7 @@ class Matches # between two sequences inside a given search space. def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) matches = [] - redundant = Hash.new { |h, k| h[k] = [] } + redundant = {} s_index = index_seq(s_seq, s_min, s_max, kmer) @@ -48,11 +50,13 @@ class Matches s_index[q_oligo].each do |s_pos| match = Match.new(q_pos, s_pos, kmer) - unless match_redundant?(redundant, match) - match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) + unless redundant[match.q_beg * 1_000_000_000 + match.s_beg] + match.expand(q_seq, s_seq, q_min, s_min, q_max, s_max) matches << match - match_redundant_add(redundant, match) + (0 ... match.length).each do |j| + redundant[(match.q_beg + j) * 1_000_000_000 + match.s_beg + j] = true + end end end @@ -79,80 +83,4 @@ class Matches index_hash end - - # Method to check if a match is redundant. - def match_redundant?(redundant, match) - redundant[match.q_beg].each do |s_interval| - if s_interval.include? match.s_beg and s_interval.include? match.s_end # TODO test if include? is slow - return true - end - end - - false - end - - # Method that adds a match to the redundancy index. - def match_redundant_add(redundant, match) - (match.q_beg .. match.q_end).each do |q| - redundant[q] << (match.s_beg .. match.s_end) - end - end - - # Method that expands a match as far as possible to the left and right. - def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - - match - end - - # Method that expands a match as far as possible to the left. - def match_expand_left(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - while match.q_beg > q_min and - match.s_beg > s_min and - q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1] - match.q_beg -= 1 - match.s_beg -= 1 - match.length += 1 - end - - match - end - - # Method that expands a match as far as possible to the right. - def match_expand_right(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - while match.q_end < q_max and - match.s_end < s_max and - q_seq[match.q_end + 1] == s_seq[match.s_end + 1] - match.length += 1 - end - - match - end - - # Class for containing a match between two sequences q and s. - class Match - attr_accessor :q_beg, :s_beg, :length, :score - - def initialize(q_beg, s_beg, length, score = 0.0) - @q_beg = q_beg - @s_beg = s_beg - @length = length - @score = score - end - - def q_end - @q_beg + @length - 1 - end - - def s_end - @s_beg + @length - 1 - end - - def to_s(seq = nil) - s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}" - s << " seq: #{seq[@q_beg .. q_end]}" if seq - s - end - end end diff --git a/code_ruby/lib/maasha/align/pair.rb b/code_ruby/lib/maasha/align/pair.rb index 2c8c523..8bcbf25 100644 --- a/code_ruby/lib/maasha/align/pair.rb +++ b/code_ruby/lib/maasha/align/pair.rb @@ -22,7 +22,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'maasha/align/match' +require 'maasha/align/matches' require 'maasha/math_aux' FACTOR_SCORE_LENGTH = 1.0 @@ -69,6 +69,7 @@ module PairAlign while (matches.size == 0 and kmer > 0) matches = Matches.find(q_seq, s_seq, space.q_min, space.s_min, space.q_max, space.s_max, kmer) + #matches = Mem.find(q_seq, s_seq, kmer, space.q_min, space.s_min, space.q_max, space.s_max) if @matches.empty? matches.sort_by! { |m| m.length } -- 2.39.2