# is allowed to contain a given maximum edit distance. If a match is located a
# Match object will be returned otherwise nil.
def match(pattern, pos = 0, max_edit_distance = 0)
- @pattern = pattern
- @pos = pos
- @max_edit_distance = max_edit_distance
- @vector = vector_init
+ vector = Vector.new(@seq, pattern, max_edit_distance)
- while @pos < @seq.length
- vector_update
+ while pos < @seq.length
+ vector.update(pos)
- return match_new if match_found?
+ return vector.to_match(pos) if vector.match_found?
- @pos += 1
+ pos += 1
end
+
+ nil # no match
end
# ------------------------------------------------------------------------------
return matches unless block_given?
end
+end
- private
-
- # Method to initailize the score vector and return this.
- def vector_init
- vector = []
+# Class containing the score vector used for locating matches.
+class Vector
+ # Method to initailize the score vector.
+ def initialize(seq, pattern, max_edit_distance)
+ @seq = seq
+ @pattern = pattern
+ @max_edit_distance = max_edit_distance
+ @vector = []
(0 ... @pattern.length + 1).each do |i|
- vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i)
+ @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
end
-
- vector
end
# Method to update the score vector.
- def vector_update
+ def update(pos)
score_diag = @vector[0]
score_up = Score.new # insertion
score_left = @vector[1] # deletion
(0 ... @pattern.length).each do |i|
- if match?(@seq[@pos], @pattern[i])
+ if match?(@seq[pos], @pattern[i])
new_score = score_diag.dup
new_score.matches += 1
else
new_score = score_up.dup
new_score.insertions += 1
end
+
+ new_score.edit_distance += 1
end
score_diag = @vector[i + 1]
end
end
+ # Method that determines if a match was found by analyzing the score vector.
+ def match_found?
+ if @vector.last.edit_distance <= @max_edit_distance
+ true
+ end
+ end
+
+ # Method that returns a Match object initialized with
+ # information from the score vector.
+ def to_match(pos)
+ matches = @vector.last.matches
+ mismatches = @vector.last.mismatches
+ insertions = @vector.last.insertions
+ deletions = @vector.last.deletions
+ length = @pattern.length - insertions + deletions
+ offset = pos - length + 1
+ match = @seq[offset ... offset + length]
+
+ Match.new(offset, match, matches, mismatches, insertions, deletions, length)
+ end
+
+ # Method to convert the score vector to a string.
+ def to_s
+ "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n"
+ end
+
+ private
+
# Method to determine if a match occurred.
def match?(char1, char2)
(EQUAL[char1.ord] & EQUAL[char2.ord]) != 0
# Method to determine if a mismatch occured.
def mismatch?(score_diag, score_up, score_left)
if score_diag.edit_distance <= score_up.edit_distance and
- score_diag.edit_distance <= score_left.edit_distance
+ score_diag.edit_distance <= score_left.edit_distance
true
end
end
# Method to determine if an insertion occured.
def insertion?(score_diag, score_up, score_left)
if score_up.edit_distance <= score_diag.edit_distance and
- score_up.edit_distance <= score_left.edit_distance
+ score_up.edit_distance <= score_left.edit_distance
true
end
end
# Method to determine if a deletion occured.
def deletion?(score_diag, score_up, score_left)
if score_left.edit_distance <= score_diag.edit_distance and
- score_left.edit_distance <= score_up.edit_distance
+ score_left.edit_distance <= score_up.edit_distance
true
end
end
+end
- # Method to print the score vector.
- def vector_print
- @vector.each do |s|
- puts s
- end
+# Class to instantiate Score objects that holds score information.
+class Score
+ attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance
- puts
+ def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0)
+ @matches = matches
+ @mismatches = mismatches
+ @insertions = insertions
+ @deletions = deletions
+ @edit_distance = edit_distance
end
- # Method that returns a Match object initialized with
- # information from the score vector.
- def match_new
- matches = @vector.last.matches
- mismatches = @vector.last.mismatches
- insertions = @vector.last.insertions
- deletions = @vector.last.deletions
- length = @pattern.length - insertions + deletions
- pos = @pos - length + 1
- match = @seq[pos ... pos + length]
-
- Match.new(pos, match, matches, mismatches, insertions, deletions, length)
- end
-
- # Method that determines if a match was found by analyzing the score vector.
- def match_found?
- if @vector.last.edit_distance <= @max_edit_distance
- true
- end
- end
-
- # Class to instantiate Score objects that holds score information.
- class Score
- attr_accessor :matches, :mismatches, :insertions, :deletions
-
- def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0)
- @matches = matches
- @mismatches = mismatches
- @insertions = insertions
- @deletions = deletions
- end
-
- # Method to calculate and return the edit distance.
- def edit_distance
- self.mismatches + self.insertions + self.deletions
- end
-
- private
-
- def to_s
- "(#{[self.matches, self.mismatches, self.insertions, self.deletions].join(',')})"
- end
+ def to_s
+ "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})"
end
+end
- # Class for creating Match objects which contain the description of a
- # match between a nucleotide sequence and a pattern.
- class Match
- attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
-
- def initialize(pos, match, matches, mismatches, insertions, deletions, length)
- @pos = pos
- @match = match
- @matches = matches
- @mismatches = mismatches
- @insertions = insertions
- @deletions = deletions
- @edit_distance = mismatches + insertions + deletions
- @length = length
- end
+# Class for creating Match objects which contain the description of a
+# match between a nucleotide sequence and a pattern.
+class Match
+ attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
+
+ def initialize(pos, match, matches, mismatches, insertions, deletions, length)
+ @pos = pos
+ @match = match
+ @matches = matches
+ @mismatches = mismatches
+ @insertions = insertions
+ @deletions = deletions
+ @edit_distance = mismatches + insertions + deletions
+ @length = length
end
end