]> git.donarmstrong.com Git - biopieces.git/commitdiff
refactored patternmatcher
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 18 May 2011 14:52:28 +0000 (14:52 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 18 May 2011 14:52:28 +0000 (14:52 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1408 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/patternmatcher.rb

index 89ffb70c445d1df7beea29d01509484e2468ae26..a07fd3a80c4c8234d539390edaddd4961a9762ec 100644 (file)
@@ -81,18 +81,17 @@ module PatternMatcher
   # is allowed to contain a given maximum edit distance. If a match is located a 
   # Match object will be returned otherwise nil.
   def match(pattern, pos = 0, max_edit_distance = 0)
-    @pattern           = pattern
-    @pos               = pos
-    @max_edit_distance = max_edit_distance
-    @vector            = vector_init
+    vector = Vector.new(@seq, pattern, max_edit_distance)
 
-    while @pos < @seq.length
-      vector_update
+    while pos < @seq.length
+      vector.update(pos)
 
-      return match_new if match_found?
+      return vector.to_match(pos) if vector.match_found?
 
-      @pos += 1
+      pos += 1
     end
+
+    nil   # no match
   end
 
   # ------------------------------------------------------------------------------
@@ -123,28 +122,30 @@ module PatternMatcher
 
     return matches unless block_given?
   end
+end
 
-  private
-
-  # Method to initailize the score vector and return this.
-  def vector_init
-    vector = []
+# Class containing the score vector used for locating matches.
+class Vector
+  # Method to initailize the score vector.
+  def initialize(seq, pattern, max_edit_distance)
+    @seq               = seq
+    @pattern           = pattern
+    @max_edit_distance = max_edit_distance
+    @vector            = []
 
     (0 ... @pattern.length + 1).each do |i|
-      vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i)
+      @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
     end
-
-    vector
   end
 
   # Method to update the score vector.
-  def vector_update
+  def update(pos)
     score_diag = @vector[0]
     score_up   = Score.new  # insertion
     score_left = @vector[1] # deletion
 
     (0 ... @pattern.length).each do |i|
-      if match?(@seq[@pos], @pattern[i])
+      if match?(@seq[pos], @pattern[i])
         new_score = score_diag.dup
         new_score.matches += 1
       else
@@ -158,6 +159,8 @@ module PatternMatcher
           new_score = score_up.dup
           new_score.insertions += 1
         end
+
+        new_score.edit_distance += 1
       end
 
       score_diag = @vector[i + 1]
@@ -168,6 +171,34 @@ module PatternMatcher
     end
   end
 
+  # Method that determines if a match was found by analyzing the score vector.
+  def match_found?
+    if @vector.last.edit_distance <= @max_edit_distance
+      true
+    end
+  end
+
+  # Method that returns a Match object initialized with
+  # information from the score vector.
+  def to_match(pos)
+    matches    = @vector.last.matches
+    mismatches = @vector.last.mismatches
+    insertions = @vector.last.insertions
+    deletions  = @vector.last.deletions
+    length     = @pattern.length - insertions + deletions
+    offset     = pos - length + 1
+    match      = @seq[offset ... offset + length]
+
+    Match.new(offset, match, matches, mismatches, insertions, deletions, length)
+  end
+
+  # Method to convert the score vector to a string.
+  def to_s
+    "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n"
+  end
+
+  private
+
   # Method to determine if a match occurred.
   def match?(char1, char2)
     (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0
@@ -176,7 +207,7 @@ module PatternMatcher
   # Method to determine if a mismatch occured.
   def mismatch?(score_diag, score_up, score_left)
     if score_diag.edit_distance <= score_up.edit_distance and
-       score_diag.edit_distance <= score_left.edit_distance
+      score_diag.edit_distance <= score_left.edit_distance
       true
     end
   end
@@ -184,7 +215,7 @@ module PatternMatcher
   # Method to determine if an insertion occured.
   def insertion?(score_diag, score_up, score_left)
     if score_up.edit_distance <= score_diag.edit_distance and
-       score_up.edit_distance <= score_left.edit_distance
+      score_up.edit_distance <= score_left.edit_distance
       true
     end
   end
@@ -192,78 +223,42 @@ module PatternMatcher
   # Method to determine if a deletion occured.
   def deletion?(score_diag, score_up, score_left)
     if score_left.edit_distance <= score_diag.edit_distance and
-       score_left.edit_distance <= score_up.edit_distance
+      score_left.edit_distance <= score_up.edit_distance
       true
     end
   end
+end
 
-  # Method to print the score vector.
-  def vector_print
-    @vector.each do |s|
-      puts s
-    end
+# Class to instantiate Score objects that holds score information.
+class Score
+  attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance
 
-    puts
+  def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0)
+    @matches       = matches
+    @mismatches    = mismatches
+    @insertions    = insertions
+    @deletions     = deletions
+    @edit_distance = edit_distance
   end
 
-  # Method that returns a Match object initialized with
-  # information from the score vector.
-  def match_new
-    matches    = @vector.last.matches
-    mismatches = @vector.last.mismatches
-    insertions = @vector.last.insertions
-    deletions  = @vector.last.deletions
-    length     = @pattern.length - insertions + deletions
-    pos        = @pos - length + 1
-    match      = @seq[pos ... pos + length]
-
-    Match.new(pos, match, matches, mismatches, insertions, deletions, length)
-  end
-
-  # Method that determines if a match was found by analyzing the score vector.
-  def match_found?
-    if @vector.last.edit_distance <= @max_edit_distance
-      true
-    end
-  end
-
-  # Class to instantiate Score objects that holds score information.
-  class Score
-    attr_accessor :matches, :mismatches, :insertions, :deletions
-
-    def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0)
-      @matches    = matches
-      @mismatches = mismatches
-      @insertions = insertions
-      @deletions  = deletions
-    end
-
-    # Method to calculate and return the edit distance.
-    def edit_distance
-      self.mismatches + self.insertions + self.deletions
-    end
-
-    private    
-
-    def to_s
-      "(#{[self.matches, self.mismatches, self.insertions, self.deletions].join(',')})"
-    end
+  def to_s
+    "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})"
   end
+end
 
-  # Class for creating Match objects which contain the description of a
-  # match between a nucleotide sequence and a pattern.
-  class Match
-    attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
-
-    def initialize(pos, match, matches, mismatches, insertions, deletions, length)
-      @pos           = pos
-      @match         = match
-      @matches       = matches
-      @mismatches    = mismatches
-      @insertions    = insertions
-      @deletions     = deletions
-      @edit_distance = mismatches + insertions + deletions
-      @length        = length
-    end
+# Class for creating Match objects which contain the description of a
+# match between a nucleotide sequence and a pattern.
+class Match
+  attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
+
+  def initialize(pos, match, matches, mismatches, insertions, deletions, length)
+    @pos           = pos
+    @match         = match
+    @matches       = matches
+    @mismatches    = mismatches
+    @insertions    = insertions
+    @deletions     = deletions
+    @edit_distance = mismatches + insertions + deletions
+    @length        = length
   end
 end