# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+require 'maasha/align/match'
+require 'maasha/math_aux'
+
+FACTOR_SCORE_LENGTH = 1.0
+FACTOR_SCORE_DIAG = -1.42
+
# Module with stuff to create a pairwise aligment.
module PairAlign
# Class for creating a pairwise alignment.
@q_entry.seq.downcase!
@s_entry.seq.downcase!
- align_recurse(@q_entry.seq, @s_entry.seq, 0, 0, @q_entry.length - 1, @s_entry.length - 1, 16, [])
+ space = Space.new( 0, 0, @q_entry.length - 1, @s_entry.length - 1)
+ kmer = 16
+
+ align_recurse(@q_entry.seq, @s_entry.seq, space, kmer)
matches_upcase
gaps_insert
end
# depending on a calculated score. New search spaces spanning the spaces
# between the best scoring matches and the search space boundaries will be
# cast and recursed into.
- def align_recurse(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer, matches)
- matches = matches_select_by_space(matches, q_min, s_min, q_max, s_max)
+ def align_recurse(q_seq, s_seq, space, kmer, matches = [])
+ matches = matches_select_by_space(matches, space)
while (matches.size == 0 and kmer > 0)
- matches = matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
+ matches = Matches.find(q_seq, s_seq, space.q_min, space.s_min, space.q_max, space.s_max, kmer)
kmer /= 2
end
- matches_score(matches, q_min, s_min, q_max, s_max)
+ matches_score(matches, space)
# matches.each { |m| puts m.to_s(q_seq) }
if best_match = matches.pop
@matches << best_match
- l_q_min = q_min
- l_s_min = s_min
- l_q_max = best_match.q_beg - 1
- l_s_max = best_match.s_beg - 1
-
- r_q_min = best_match.q_end + 1
- r_s_min = best_match.s_end + 1
- r_q_max = q_max
- r_s_max = s_max
-
- if l_q_max - l_q_min > 0 and l_s_max - l_s_min > 0
- align_recurse(q_seq, s_seq, l_q_min, l_s_min, l_q_max, l_s_max, kmer, matches)
- end
+ space_left = Space.new(space.q_min, space.s_min, best_match.q_beg - 1, best_match.s_beg - 1)
+ space_right = Space.new(best_match.q_end + 1, best_match.s_end + 1, space.q_max, space.s_max)
- if r_q_max - r_q_min > 0 and r_s_max - r_s_min > 0
- align_recurse(q_seq, s_seq, r_q_min, r_s_min, r_q_max, r_s_max, kmer, matches)
- end
+ align_recurse(q_seq, s_seq, space_left, kmer, matches) unless space_left.empty?
+ align_recurse(q_seq, s_seq, space_right, kmer, matches) unless space_right.empty?
end
end
# Method to select matches that lies within the search space.
- def matches_select_by_space(matches, q_min, s_min, q_max, s_max)
+ def matches_select_by_space(matches, space)
new_matches = matches.select do |match|
- match.q_beg >= q_min and
- match.s_beg >= s_min and
- match.q_end <= q_max and
- match.s_end <= s_max
+ match.q_beg >= space.q_min and
+ match.s_beg >= space.s_min and
+ match.q_end <= space.q_max and
+ match.s_end <= space.s_max
end
new_matches
end
- def matches_score(matches, q_min, s_min, q_max, s_max)
+ def matches_score(matches, space)
matches.each do |match|
score_length = match_score_length(match)
- score_diag = match_score_diag(match, q_min, s_min, q_max, s_max)
- score_corner = match_score_corner(match, q_min, s_min, q_max, s_max)
-
+ score_diag = match_score_diag(match, space)
- match.score = score_length - score_diag - score_corner
- puts match
- puts "score_length: #{score_length} score_diag: #{score_diag} score_corner: #{score_corner} score: #{match.score}"
+ match.score = score_length + score_diag
end
matches.sort_by! { |match| match.score }
end
def match_score_length(match)
- match.length
+ match.length * FACTOR_SCORE_LENGTH
end
- def match_score_diag(match, q_min, s_min, q_max, s_max)
- dist1 = (match.q_beg - match.s_beg).abs
- dist2 = ((match.q_end - match.q_end).abs - dist1).abs
+ def match_score_diag(match, space)
+ if space.q_dim >= space.s_dim # s_dim is the narrow end
+ dist_beg = Math.dist_point2line(match.q_beg,
+ match.s_beg,
+ space.q_min,
+ space.s_min,
+ space.q_min + space.s_dim,
+ space.s_min + space.s_dim)
+
+ dist_end = Math.dist_point2line( match.q_beg,
+ match.s_beg,
+ space.q_max - space.s_dim,
+ space.s_max - space.s_dim,
+ space.q_max,
+ space.s_max)
+ else
+ dist_beg = Math.dist_point2line( match.q_beg,
+ match.s_beg,
+ space.q_min,
+ space.s_min,
+ space.q_min + space.q_dim,
+ space.s_min + space.q_dim)
+
+ dist_end = Math.dist_point2line( match.q_beg,
+ match.s_beg,
+ space.q_max - space.q_dim,
+ space.s_max - space.q_dim,
+ space.q_max,
+ space.s_max)
+ end
- dist1 < dist2 ? dist1 : dist2
- end
+ dist_min = dist_beg < dist_end ? dist_beg : dist_end
- def match_score_corner(match, q_min, s_min, q_max, s_max)
- 1
+ dist_min * FACTOR_SCORE_DIAG
end
-
# Method that finds all maximally expanded non-redundant matches shared
# between two sequences inside a given search space.
def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
end
end
+ # Class for containing a search space between two sequences q and s.
+ class Space
+ attr_reader :q_min, :s_min, :q_max, :s_max
+
+ def initialize(q_min, s_min, q_max, s_max)
+ @q_min = q_min
+ @s_min = s_min
+ @q_max = q_max
+ @s_max = s_max
+ end
+
+ def q_dim
+ @q_max - @q_min + 1
+ end
+
+ def s_dim
+ @s_max - @s_min + 1
+ end
+
+ def empty?
+ if @q_max - @q_min > 0 and @s_max - @s_min > 0
+ return false
+ end
+
+ true
+ end
+ end
+
# Class for containing a match between two sequences q and s.
class Match
attr_accessor :q_beg, :s_beg, :length, :score