From ee4553e119dd4a9b0e55baa8b5d5393adae235f5 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 11 Sep 2012 09:54:08 +0000 Subject: [PATCH] worked on pair.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1919 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/align/pair.rb | 129 +++++++++++++++++++---------- 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/code_ruby/lib/maasha/align/pair.rb b/code_ruby/lib/maasha/align/pair.rb index 72a5eae..c948673 100644 --- a/code_ruby/lib/maasha/align/pair.rb +++ b/code_ruby/lib/maasha/align/pair.rb @@ -22,6 +22,12 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/align/match' +require 'maasha/math_aux' + +FACTOR_SCORE_LENGTH = 1.0 +FACTOR_SCORE_DIAG = -1.42 + # Module with stuff to create a pairwise aligment. module PairAlign # Class for creating a pairwise alignment. @@ -40,7 +46,10 @@ module PairAlign @q_entry.seq.downcase! @s_entry.seq.downcase! - align_recurse(@q_entry.seq, @s_entry.seq, 0, 0, @q_entry.length - 1, @s_entry.length - 1, 16, []) + space = Space.new( 0, 0, @q_entry.length - 1, @s_entry.length - 1) + kmer = 16 + + align_recurse(@q_entry.seq, @s_entry.seq, space, kmer) matches_upcase gaps_insert end @@ -54,15 +63,15 @@ module PairAlign # depending on a calculated score. New search spaces spanning the spaces # between the best scoring matches and the search space boundaries will be # cast and recursed into. - def align_recurse(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer, matches) - matches = matches_select_by_space(matches, q_min, s_min, q_max, s_max) + def align_recurse(q_seq, s_seq, space, kmer, matches = []) + matches = matches_select_by_space(matches, space) while (matches.size == 0 and kmer > 0) - matches = matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) + matches = Matches.find(q_seq, s_seq, space.q_min, space.s_min, space.q_max, space.s_max, kmer) kmer /= 2 end - matches_score(matches, q_min, s_min, q_max, s_max) + matches_score(matches, space) # matches.each { |m| puts m.to_s(q_seq) } @@ -73,69 +82,77 @@ module PairAlign if best_match = matches.pop @matches << best_match - l_q_min = q_min - l_s_min = s_min - l_q_max = best_match.q_beg - 1 - l_s_max = best_match.s_beg - 1 - - r_q_min = best_match.q_end + 1 - r_s_min = best_match.s_end + 1 - r_q_max = q_max - r_s_max = s_max - - if l_q_max - l_q_min > 0 and l_s_max - l_s_min > 0 - align_recurse(q_seq, s_seq, l_q_min, l_s_min, l_q_max, l_s_max, kmer, matches) - end + space_left = Space.new(space.q_min, space.s_min, best_match.q_beg - 1, best_match.s_beg - 1) + space_right = Space.new(best_match.q_end + 1, best_match.s_end + 1, space.q_max, space.s_max) - if r_q_max - r_q_min > 0 and r_s_max - r_s_min > 0 - align_recurse(q_seq, s_seq, r_q_min, r_s_min, r_q_max, r_s_max, kmer, matches) - end + align_recurse(q_seq, s_seq, space_left, kmer, matches) unless space_left.empty? + align_recurse(q_seq, s_seq, space_right, kmer, matches) unless space_right.empty? end end # Method to select matches that lies within the search space. - def matches_select_by_space(matches, q_min, s_min, q_max, s_max) + def matches_select_by_space(matches, space) new_matches = matches.select do |match| - match.q_beg >= q_min and - match.s_beg >= s_min and - match.q_end <= q_max and - match.s_end <= s_max + match.q_beg >= space.q_min and + match.s_beg >= space.s_min and + match.q_end <= space.q_max and + match.s_end <= space.s_max end new_matches end - def matches_score(matches, q_min, s_min, q_max, s_max) + def matches_score(matches, space) matches.each do |match| score_length = match_score_length(match) - score_diag = match_score_diag(match, q_min, s_min, q_max, s_max) - score_corner = match_score_corner(match, q_min, s_min, q_max, s_max) - + score_diag = match_score_diag(match, space) - match.score = score_length - score_diag - score_corner - puts match - puts "score_length: #{score_length} score_diag: #{score_diag} score_corner: #{score_corner} score: #{match.score}" + match.score = score_length + score_diag end matches.sort_by! { |match| match.score } end def match_score_length(match) - match.length + match.length * FACTOR_SCORE_LENGTH end - def match_score_diag(match, q_min, s_min, q_max, s_max) - dist1 = (match.q_beg - match.s_beg).abs - dist2 = ((match.q_end - match.q_end).abs - dist1).abs + def match_score_diag(match, space) + if space.q_dim >= space.s_dim # s_dim is the narrow end + dist_beg = Math.dist_point2line(match.q_beg, + match.s_beg, + space.q_min, + space.s_min, + space.q_min + space.s_dim, + space.s_min + space.s_dim) + + dist_end = Math.dist_point2line( match.q_beg, + match.s_beg, + space.q_max - space.s_dim, + space.s_max - space.s_dim, + space.q_max, + space.s_max) + else + dist_beg = Math.dist_point2line( match.q_beg, + match.s_beg, + space.q_min, + space.s_min, + space.q_min + space.q_dim, + space.s_min + space.q_dim) + + dist_end = Math.dist_point2line( match.q_beg, + match.s_beg, + space.q_max - space.q_dim, + space.s_max - space.q_dim, + space.q_max, + space.s_max) + end - dist1 < dist2 ? dist1 : dist2 - end + dist_min = dist_beg < dist_end ? dist_beg : dist_end - def match_score_corner(match, q_min, s_min, q_max, s_max) - 1 + dist_min * FACTOR_SCORE_DIAG end - # Method that finds all maximally expanded non-redundant matches shared # between two sequences inside a given search space. def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) @@ -284,6 +301,34 @@ module PairAlign end end + # Class for containing a search space between two sequences q and s. + class Space + attr_reader :q_min, :s_min, :q_max, :s_max + + def initialize(q_min, s_min, q_max, s_max) + @q_min = q_min + @s_min = s_min + @q_max = q_max + @s_max = s_max + end + + def q_dim + @q_max - @q_min + 1 + end + + def s_dim + @s_max - @s_min + 1 + end + + def empty? + if @q_max - @q_min > 0 and @s_max - @s_min > 0 + return false + end + + true + end + end + # Class for containing a match between two sequences q and s. class Match attr_accessor :q_beg, :s_beg, :length, :score -- 2.39.5