require 'open3'
require 'narray'
require 'maasha/fasta'
+require 'maasha/mum'
class AlignError < StandardError; end;
q_entry.seq.downcase!
s_entry.seq.downcase!
- ap = AlignPair.new(q_entry.seq, s_entry.seq)
+ ap = AlignPair.new(q_entry, s_entry)
ap.align(q_space_beg, q_space_end, s_space_beg, s_space_end, 15, [])
class AlignPair
attr_reader :matches
- # Method to initialize an AlignPair object given two sequence strings.
- def initialize(q_seq, s_seq)
- @q_seq = q_seq
- @s_seq = s_seq
+ # Method to initialize an AlignPair object given two Seq objects.
+ def initialize(q_entry, s_entry)
+ @q_entry = q_entry
+ @s_entry = s_entry
@matches = []
end
end
while new_matches.empty? and kmer_size > 0
- if @q_seq.length >= kmer_size and @s_seq.length >= kmer_size
- q_index = index_seq(q_space_beg, q_space_end, @q_seq, kmer_size, kmer_size)
- s_index = index_seq(s_space_beg, s_space_end, @s_seq, kmer_size, 1)
- new_matches = find_matches(q_index, s_index, kmer_size)
+ if @q_entry.length >= kmer_size and @s_entry.length >= kmer_size
+ new_matches = MUMs.find(@q_entry, @s_entry, {"q_space_beg" => q_space_beg,
+ "q_space_end" => q_space_end,
+ "s_space_beg" => s_space_beg,
+ "s_space_end" => s_space_end,
+ "kmer_size" => kmer_size} )
end
unless new_matches.empty?
longest_match = new_matches.sort_by { |match| match.length }.pop
- longest_match.expand(@q_seq, @s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
# pp longest_match
@matches << longest_match
q_space_beg_right = longest_match.q_end + 1
s_space_beg_right = longest_match.s_end + 1
- q_space_end_right = (q_space_end == @q_seq.length) ? @q_seq.length : q_space_end - 1
- s_space_end_right = (s_space_end == @s_seq.length) ? @s_seq.length : s_space_end - 1
+ q_space_end_right = (q_space_end == @q_entry.length) ? @q_entry.length : q_space_end - 1
+ s_space_end_right = (s_space_end == @s_entry.length) ? @s_entry.length : s_space_end - 1
if q_space_end_left - q_space_beg_left > 0 and s_space_end_left - s_space_beg_left > 0
align(q_space_beg_left, q_space_end_left, s_space_beg_left, s_space_end_left, kmer_size, new_matches)
# sequence is kept in lower case.
def upcase_match
@matches.each do |match|
- @q_seq[match.q_beg .. match.q_end] = @q_seq[match.q_beg .. match.q_end].upcase
- @s_seq[match.s_beg .. match.s_end] = @s_seq[match.s_beg .. match.s_end].upcase
+ @q_entry.seq[match.q_beg .. match.q_end] = @q_entry.seq[match.q_beg .. match.q_end].upcase
+ @s_entry.seq[match.s_beg .. match.s_end] = @s_entry.seq[match.s_beg .. match.s_end].upcase
end
end
#pp "q_gaps #{q_gaps} s_gaps #{s_gaps} diff #{diff}"
if diff < 0
- @q_seq.insert(match.q_beg + q_gaps, "-" * diff.abs)
+ @q_entry.seq.insert(match.q_beg + q_gaps, "-" * diff.abs)
q_gaps += diff.abs
elsif diff > 0
- @s_seq.insert(match.s_beg + s_gaps, "-" * diff.abs)
+ @s_entry.seq.insert(match.s_beg + s_gaps, "-" * diff.abs)
s_gaps += diff.abs
end
end
- diff = @q_seq.length - @s_seq.length
+ diff = @q_entry.length - @s_entry.length
if diff < 0
- @q_seq << ("-" * diff.abs)
+ @q_entry.seq << ("-" * diff.abs)
else
- @s_seq << ("-" * diff.abs)
+ @s_entry.seq << ("-" * diff.abs)
end
end
puts "---"
}
end
-
- private
-
- # Method for indexing a sequence given a search space, oligo size
- # and step size. All oligo positions are saved in a lookup hash
- # where the oligo is the key and the position is the value. Non-
- # unique oligos are removed and the index returned.
- def index_seq(space_beg, space_end, seq, kmer_size, step_size)
- index = Hash.new
- index_count = Hash.new(0)
-
- i = space_beg
-
- while i < space_end - kmer_size + 1
- oligo = seq[i ... i + kmer_size]
-
- index[oligo] = i
- index_count[oligo] += 1
- i += step_size
- end
-
- index_count.each do |oligo, count|
- index.delete(oligo) if count > 1
- end
-
- index
- end
-
- # Method for locating matches between two indexes where one is created
- # using non-overlapping unique oligos and their positions and one is
- # using overlapping unique oligos and positions. Thus iterating over the
- # non-overlapping oligos and checking last match, this can be extended.
- def find_matches(q_index, s_index, kmer_size)
- matches = []
-
- q_index.each do |oligo, q_pos|
- if s_pos = s_index[oligo]
-
- last_match = matches.last
- new_match = Match.new(q_pos, s_pos, kmer_size)
-
- if last_match and
- last_match.q_beg + last_match.length == new_match.q_beg and
- last_match.s_beg + last_match.length == new_match.s_beg
-
- last_match.length += kmer_size
- else
- matches << new_match
- end
- end
- end
-
- matches
- end
-
- # Class for Match objects.
- class Match
- attr_accessor :length
- attr_reader :q_beg, :s_beg
-
- def initialize(q_beg, s_beg, length)
- @q_beg = q_beg
- @s_beg = s_beg
- @length = length
- end
-
- def q_end
- @q_beg + @length - 1
- end
-
- def s_end
- @s_beg + @length - 1
- end
-
- def to_s
- "q_beg=#{q_beg} q_end=#{q_end} s_beg=#{s_beg} s_end=#{s_end} length=#{length}"
- end
-
- # Method to expand a match as far as possible to the left and right
- # within a given search space.
- def expand(q_seq, s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
- expand_left(q_seq, s_seq, q_space_beg, s_space_beg)
- expand_right(q_seq, s_seq, q_space_end, s_space_end)
- end
-
- private
-
- # Method to expand a match as far as possible to the left within a given
- # search space.
- def expand_left(q_seq, s_seq, q_space_beg, s_space_beg)
- while @q_beg > q_space_beg and @s_beg > s_space_beg and q_seq[@q_beg - 1] == s_seq[@s_beg - 1]
- @q_beg -= 1
- @s_beg -= 1
- @length += 1
- end
- end
-
- # Method to expand a match as far as possible to the right within a given
- # search space.
- def expand_right(q_seq, s_seq, q_space_end, s_space_end)
- while q_end + 1 <= q_space_end and s_end + 1 <= s_space_end and q_seq[q_end + 1] == s_seq[s_end + 1]
- @length += 1
- end
- end
- end
end
class Consensus