# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-require 'maasha/align/match'
+require 'maasha/align/matches'
require 'maasha/math_aux'
FACTOR_SCORE_LENGTH = 1.0
-FACTOR_SCORE_DIAG = -1.42
+FACTOR_SCORE_DIAG = -1.41
+KMER = 32
# Module with stuff to create a pairwise aligment.
module PairAlign
@q_entry.seq.downcase!
@s_entry.seq.downcase!
- space = Space.new( 0, 0, @q_entry.length - 1, @s_entry.length - 1)
- kmer = 16
+ space = Space.new(0, 0, @q_entry.length - 1, @s_entry.length - 1)
- align_recurse(@q_entry.seq, @s_entry.seq, space, kmer)
+ align_recurse(@q_entry.seq, @s_entry.seq, space, KMER)
matches_upcase
gaps_insert
end
# cast and recursed into.
def align_recurse(q_seq, s_seq, space, kmer, matches = [])
matches = matches_select_by_space(matches, space)
+ matches = matches_select_by_score(matches, space)
while (matches.size == 0 and kmer > 0)
matches = Matches.find(q_seq, s_seq, space.q_min, space.s_min, space.q_max, space.s_max, kmer)
- kmer /= 2
- end
-
- matches_score(matches, space)
+ #matches = Mem.find(q_seq, s_seq, kmer, space.q_min, space.s_min, space.q_max, space.s_max)
-# matches.each { |m| puts m.to_s(q_seq) }
+ if @matches.empty?
+ matches.sort_by! { |m| m.length }
+ else
+ matches = matches_select_by_score(matches, space)
+ end
- unless @matches.empty?
- matches = matches.select { |match| match.score > 0 }
+ kmer /= 2
end
if best_match = matches.pop
new_matches
end
+ # Method to select matches based on score.
+ def matches_select_by_score(matches, space)
+ matches_score(matches, space)
+
+ matches.select { |match| match.score > 0 }
+ end
+
def matches_score(matches, space)
matches.each do |match|
score_length = match_score_length(match)
end
def match_score_diag(match, space)
- if space.q_dim >= space.s_dim # s_dim is the narrow end
+ if space.q_dim > space.s_dim # s_dim is the narrow end
dist_beg = Math.dist_point2line(match.q_beg,
match.s_beg,
space.q_min,
dist_min * FACTOR_SCORE_DIAG
end
- # Method that finds all maximally expanded non-redundant matches shared
- # between two sequences inside a given search space.
- def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer)
- matches = []
- redundant = Hash.new { |h, k| h[k] = [] }
-
- s_index = index_seq(s_seq, s_min, s_max, kmer)
-
- q_pos = q_min
-
- while q_pos <= q_max - kmer + 1
- q_oligo = q_seq[q_pos ... q_pos + kmer]
-
- s_index[q_oligo].each do |s_pos|
- match = Match.new(q_pos, s_pos, kmer)
-
- unless match_redundant?(redundant, match)
- match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
- matches << match
-
- match_redundant_add(redundant, match)
- end
- end
-
- q_pos += 1
- end
-
- matches
- end
-
- # Method that indexes a sequence within a given interval such that the
- # index contains all oligos of a given kmer size and the positions where
- # this oligo was located.
- def index_seq(seq, min, max, kmer)
- index_hash = Hash.new { |h, k| h[k] = [] }
-
- pos = min
-
- while pos <= max - kmer + 1
- oligo = seq[pos ... pos + kmer]
- index_hash[oligo] << pos
-
- pos += 1
- end
-
- index_hash
- end
-
- # Method to check if a match is redundant.
- def match_redundant?(redundant, match)
- redundant[match.q_beg].each do |s_interval|
- if s_interval.include? match.s_beg and s_interval.include? match.s_end
- return true
- end
- end
-
- false
- end
-
- # Method that adds a match to the redundancy index.
- def match_redundant_add(redundant, match)
- (match.q_beg .. match.q_end).each do |q|
- redundant[q] << (match.s_beg .. match.s_end)
- end
- end
-
- # Method that expands a match as far as possible to the left and right.
- def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max)
- match_expand_left(match, q_seq, s_seq, q_min, s_min)
- match_expand_right(match, q_seq, s_seq, q_max, s_max)
-
- match
- end
-
- # Method that expands a match as far as possible to the left.
- def match_expand_left(match, q_seq, s_seq, q_min, s_min)
- while match.q_beg > q_min and
- match.s_beg > s_min and
- q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1]
- match.q_beg -= 1
- match.s_beg -= 1
- match.length += 1
- end
-
- match
- end
-
- # Method that expands a match as far as possible to the right.
- def match_expand_right(match, q_seq, s_seq, q_max, s_max)
- while match.q_end < q_max and
- match.s_end < s_max and
- q_seq[match.q_end + 1] == s_seq[match.s_end + 1]
- match.length += 1
- end
-
- match
- end
-
# Method for debugging purposes that upcase matching sequence while non-matches
# sequence is kept in lower case.
def matches_upcase
end
def empty?
- if @q_max - @q_min > 0 and @s_max - @s_min > 0
+ if @q_max - @q_min >= 0 and @s_max - @s_min >= 0
return false
end
true
end
end
-
- # Class for containing a match between two sequences q and s.
- class Match
- attr_accessor :q_beg, :s_beg, :length, :score
-
- def initialize(q_beg, s_beg, length, score = 0.0)
- @q_beg = q_beg
- @s_beg = s_beg
- @length = length
- @score = score
- end
-
- def q_end
- @q_beg + @length - 1
- end
-
- def s_end
- @s_beg + @length - 1
- end
-
- def to_s(seq = nil)
- s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}"
- s << " seq: #{seq[@q_beg .. q_end]}" if seq
- s
- end
- end
end