From: martinahansen Date: Wed, 18 May 2011 14:52:28 +0000 (+0000) Subject: refactored patternmatcher X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=4a6a5dcc7de049a3a28a40531424eaeab49c5f4f;p=biopieces.git refactored patternmatcher git-svn-id: http://biopieces.googlecode.com/svn/trunk@1408 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/patternmatcher.rb b/code_ruby/lib/maasha/patternmatcher.rb index 89ffb70..a07fd3a 100644 --- a/code_ruby/lib/maasha/patternmatcher.rb +++ b/code_ruby/lib/maasha/patternmatcher.rb @@ -81,18 +81,17 @@ module PatternMatcher # is allowed to contain a given maximum edit distance. If a match is located a # Match object will be returned otherwise nil. def match(pattern, pos = 0, max_edit_distance = 0) - @pattern = pattern - @pos = pos - @max_edit_distance = max_edit_distance - @vector = vector_init + vector = Vector.new(@seq, pattern, max_edit_distance) - while @pos < @seq.length - vector_update + while pos < @seq.length + vector.update(pos) - return match_new if match_found? + return vector.to_match(pos) if vector.match_found? - @pos += 1 + pos += 1 end + + nil # no match end # ------------------------------------------------------------------------------ @@ -123,28 +122,30 @@ module PatternMatcher return matches unless block_given? end +end - private - - # Method to initailize the score vector and return this. - def vector_init - vector = [] +# Class containing the score vector used for locating matches. +class Vector + # Method to initailize the score vector. + def initialize(seq, pattern, max_edit_distance) + @seq = seq + @pattern = pattern + @max_edit_distance = max_edit_distance + @vector = [] (0 ... @pattern.length + 1).each do |i| - vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i) + @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i) end - - vector end # Method to update the score vector. - def vector_update + def update(pos) score_diag = @vector[0] score_up = Score.new # insertion score_left = @vector[1] # deletion (0 ... @pattern.length).each do |i| - if match?(@seq[@pos], @pattern[i]) + if match?(@seq[pos], @pattern[i]) new_score = score_diag.dup new_score.matches += 1 else @@ -158,6 +159,8 @@ module PatternMatcher new_score = score_up.dup new_score.insertions += 1 end + + new_score.edit_distance += 1 end score_diag = @vector[i + 1] @@ -168,6 +171,34 @@ module PatternMatcher end end + # Method that determines if a match was found by analyzing the score vector. + def match_found? + if @vector.last.edit_distance <= @max_edit_distance + true + end + end + + # Method that returns a Match object initialized with + # information from the score vector. + def to_match(pos) + matches = @vector.last.matches + mismatches = @vector.last.mismatches + insertions = @vector.last.insertions + deletions = @vector.last.deletions + length = @pattern.length - insertions + deletions + offset = pos - length + 1 + match = @seq[offset ... offset + length] + + Match.new(offset, match, matches, mismatches, insertions, deletions, length) + end + + # Method to convert the score vector to a string. + def to_s + "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n" + end + + private + # Method to determine if a match occurred. def match?(char1, char2) (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0 @@ -176,7 +207,7 @@ module PatternMatcher # Method to determine if a mismatch occured. def mismatch?(score_diag, score_up, score_left) if score_diag.edit_distance <= score_up.edit_distance and - score_diag.edit_distance <= score_left.edit_distance + score_diag.edit_distance <= score_left.edit_distance true end end @@ -184,7 +215,7 @@ module PatternMatcher # Method to determine if an insertion occured. def insertion?(score_diag, score_up, score_left) if score_up.edit_distance <= score_diag.edit_distance and - score_up.edit_distance <= score_left.edit_distance + score_up.edit_distance <= score_left.edit_distance true end end @@ -192,78 +223,42 @@ module PatternMatcher # Method to determine if a deletion occured. def deletion?(score_diag, score_up, score_left) if score_left.edit_distance <= score_diag.edit_distance and - score_left.edit_distance <= score_up.edit_distance + score_left.edit_distance <= score_up.edit_distance true end end +end - # Method to print the score vector. - def vector_print - @vector.each do |s| - puts s - end +# Class to instantiate Score objects that holds score information. +class Score + attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance - puts + def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0) + @matches = matches + @mismatches = mismatches + @insertions = insertions + @deletions = deletions + @edit_distance = edit_distance end - # Method that returns a Match object initialized with - # information from the score vector. - def match_new - matches = @vector.last.matches - mismatches = @vector.last.mismatches - insertions = @vector.last.insertions - deletions = @vector.last.deletions - length = @pattern.length - insertions + deletions - pos = @pos - length + 1 - match = @seq[pos ... pos + length] - - Match.new(pos, match, matches, mismatches, insertions, deletions, length) - end - - # Method that determines if a match was found by analyzing the score vector. - def match_found? - if @vector.last.edit_distance <= @max_edit_distance - true - end - end - - # Class to instantiate Score objects that holds score information. - class Score - attr_accessor :matches, :mismatches, :insertions, :deletions - - def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0) - @matches = matches - @mismatches = mismatches - @insertions = insertions - @deletions = deletions - end - - # Method to calculate and return the edit distance. - def edit_distance - self.mismatches + self.insertions + self.deletions - end - - private - - def to_s - "(#{[self.matches, self.mismatches, self.insertions, self.deletions].join(',')})" - end + def to_s + "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})" end +end - # Class for creating Match objects which contain the description of a - # match between a nucleotide sequence and a pattern. - class Match - attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length - - def initialize(pos, match, matches, mismatches, insertions, deletions, length) - @pos = pos - @match = match - @matches = matches - @mismatches = mismatches - @insertions = insertions - @deletions = deletions - @edit_distance = mismatches + insertions + deletions - @length = length - end +# Class for creating Match objects which contain the description of a +# match between a nucleotide sequence and a pattern. +class Match + attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length + + def initialize(pos, match, matches, mismatches, insertions, deletions, length) + @pos = pos + @match = match + @matches = matches + @mismatches = mismatches + @insertions = insertions + @deletions = deletions + @edit_distance = mismatches + insertions + deletions + @length = length end end