]> git.donarmstrong.com Git - biopieces.git/commitdiff
updated patternmatcher code
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 14 Apr 2011 14:27:47 +0000 (14:27 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 14 Apr 2011 14:27:47 +0000 (14:27 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1327 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/patternmatcher.rb
code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_patternmatcher.rb
code_ruby/Maasha/test/test_seq.rb

index b4d92b840c1e769867e2ac43cb2b8b5082f20eec..478ffba2e23533601bc53a17e22c532df34117f3 100644 (file)
@@ -46,75 +46,54 @@ EQUAL = {
 }
 
 # Module containing code to locate nucleotide patterns in sequences allowing for
-# ambiguity codes and a given maximum number of mismatches, insertions, and deletions.
+# ambiguity codes and a given maximum edit distance.
 # Insertions are nucleotides found in the pattern but not in the sequence.
 # Deletions are nucleotides found in the sequence but not in the pattern.
+#
+# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197):
+# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf
 module PatternMatcher
   # ------------------------------------------------------------------------------
-  #   str.match(pattern[, pos[, max_mismatches[, max_insertions[, max_deletions]]]])
+  #   str.match(pattern[, pos[, max_edit_distance]])
   #   -> Match or nil
   #
   # ------------------------------------------------------------------------------
-  # Method to locate the next pattern match starting from a given position.
-  # A match is located by exploring all possible paths allowing for a given
-  # maximum number of mismatches, insertions and deletions. If a match is
-  # located a Match object will be returned. If all paths are exhausted and
-  # no match is located the position is incremented. If no match is located
-  # whatsoever, then nil is returned.
-  # TODO: converging paths should be skipped for speed-up.
-  def match(pattern, pos = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
-    @pattern        = pattern
-    @max_mismatches = max_mismatches
-    @max_insertions = max_insertions
-    @max_deletions  = max_deletions
-
-    while pos <= @seq.length - @pattern.length + @max_insertions
-      paths = []
-      paths << Path.new(pos, seq_index = pos, pattern_index = 0)
-
-      while not paths.empty?
-        new_paths = []
-
-        paths.each do |path|
-          next if path.exhausted?(@seq, @pattern)
-          return path.to_match if match_found?(path)
-
-          if path.match?(@seq, @pattern)
-            new_paths << path.match
-          elsif path.mismatches < max_mismatches
-            new_paths << path.mismatch
-          end
-
-          new_paths << path.insertion if path.insertions < max_insertions
-          new_paths << path.deletion  if path.deletions  < max_deletions
-        end
+  # Method to locate the next pattern match starting from a given position. A match
+  # is allowed to contain a given maximum edit distance. If a match is located a 
+  # Match object will be returned otherwise nil.
+  def match(pattern, pos = 0, max_edit_distance = 0)
+    @pattern           = pattern
+    @pos               = pos
+    @max_edit_distance = max_edit_distance
+    @vector            = vector_init
 
-        paths = new_paths
-      end
+    while @pos < @seq.length
+      vector_update
+
+      return match_new if match_found?
 
-      pos += 1
+      @pos += 1
     end
   end
 
   # ------------------------------------------------------------------------------
-  #   str.scan(pattern[, pos[, max_mismatches[, max_insertions[, max_deletions]]]])
+  #   str.scan(pattern[, pos[, max_edit_distance]])
   #   -> Array
-  #   str.scan(pattern[, pos[, max_mismatches[, max_insertions[, max_deletions]]]]) { |match|
+  #   str.scan(pattern[, pos[, max_edit_distance]]) { |match|
   #     block
   #   }
   #   -> Match
   #
   # ------------------------------------------------------------------------------
   # Method to iterate through a sequence to locate pattern matches starting
-  # from a given position. A match is located by exploring all possible paths
-  # allowing for a given maximum number of mismatches, insertions and deletions.
+  # from a given position and allowing for a maximum edit distance.
   # Matches found in block context return the Match object. Otherwise matches are
   # returned in an Array.
-  def scan(pattern, pos = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
+  def scan(pattern, pos = 0, max_edit_distance = 0)
     matches = []
     offset  = pos
 
-    while match = match(pattern, offset, max_mismatches, max_insertions, max_deletions)
+    while match = match(pattern, offset, max_edit_distance)
       if block_given?
         yield match
       else
@@ -129,99 +108,139 @@ module PatternMatcher
 
   private
 
-  # Method to check if a path is complete and a match was found.
-  def match_found?(path)
-    if path.mismatches <= @max_mismatches and path.insertions <= @max_insertions and path.deletions <= @max_deletions
-      if path.matches == @pattern.length - path.insertions - path.mismatches
-        return true
-      end
+  # Method to initailize the score vector and return this.
+  def vector_init
+    vector = []
+
+    (0 ... @pattern.length + 1).each do |i|
+      vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i)
     end
+
+    vector
   end
 
-  # Class for describing a path for matching a nucleotide sequence and a pattern.
-  class Path
-    attr_accessor :pos, :seq_index, :pattern_index, :matches, :mismatches, :insertions, :deletions, :length
+  # Method to update the score vector.
+  def vector_update
+    new_vector = @vector.dup
 
-    def initialize(pos, seq_index, pattern_index, matches = 0, mismatches = 0, insertions = 0, deletions = 0, length = 0)
-      @pos           = pos
-      @seq_index     = seq_index
-      @pattern_index = pattern_index
-      @matches       = matches
-      @mismatches    = mismatches
-      @insertions    = insertions
-      @deletions     = deletions
-      @length        = length
+    (0 ... @pattern.length).each do |i|
+      if EQUAL[(@seq[@pos] + @pattern[i]).upcase.to_sym]
+        new_vector[i + 1] = @vector[i].dup
+        new_vector[i + 1].matches += 1
+      else
+        mismatch  = @vector[i].dup
+        insertion = new_vector[i].dup
+        deletion  = @vector[i + 1].dup
+
+        if deletion?(mismatch, insertion, deletion)
+          deletion.deletions += 1
+          new_vector[i + 1] = deletion
+        elsif mismatch?(mismatch, insertion, deletion)
+          mismatch.mismatches += 1
+          new_vector[i + 1] = mismatch
+        elsif insertion?(mismatch, insertion, deletion)
+          insertion.insertions += 1
+          new_vector[i + 1] = insertion
+        else
+          raise "AAAAarrgh"
+        end
+      end
     end
 
-    # Method to check if nucleotides match.
-    def match?(seq, pattern)
-      EQUAL["#{seq[self.seq_index]}#{pattern[self.pattern_index]}".upcase.to_sym]
+    @vector = new_vector
+  end
+
+  # Method to determine if a mismatch occured.
+  def mismatch?(mismatch, insertion, deletion)
+    if mismatch.edit_distance <= insertion.edit_distance and
+       mismatch.edit_distance <= deletion.edit_distance
+      true
     end
+  end
 
-    # Method to check if the path is exhausted.
-    def exhausted?(seq, pattern)
-      if self.seq_index - self.insertions > seq.length
-        true
-      elsif self.pattern_index > pattern.length
-        true
-      end
+  # Method to determine if an insertion occured.
+  def insertion?(mismatch, insertion, deletion)
+    if insertion.edit_distance <= mismatch.edit_distance and
+       insertion.edit_distance <= deletion.edit_distance
+      true
+    end
+  end
+
+  # Method to determine if a deletion occured.
+  def deletion?(mismatch, insertion, deletion)
+    if deletion.edit_distance <= mismatch.edit_distance and
+       deletion.edit_distance <= insertion.edit_distance
+      true
     end
+  end
 
-    # Method that returns a Match object created from a Path object.
-    def to_match
-      Match.new(@pos, @matches, @mismatches, @insertions, @deletions, @length)
+  # Method to print the score vector.
+  def vector_print
+    @vector.each do |s|
+      puts s
     end
 
-    # Method that returns a new Match object for a matching path
-    def match
-      path_match                = self.dup
-      path_match.length        += 1
-      path_match.matches       += 1
-      path_match.seq_index     += 1
-      path_match.pattern_index += 1
-      path_match
+    puts
+  end
+
+  # Method that returns a Match object initialized with
+  # information from the score vector.
+  def match_new
+    matches    = @vector.last.matches
+    mismatches = @vector.last.mismatches
+    insertions = @vector.last.insertions
+    deletions  = @vector.last.deletions
+    length     = @pattern.length - insertions + deletions
+    pos        = @pos - length + 1
+    match      = @seq[pos ... pos + length]
+
+    Match.new(pos, match, matches, mismatches, insertions, deletions, length)
+  end
+
+  # Method that determines if a match was found by analyzing the score vector.
+  def match_found?
+    if @vector.last.edit_distance <= @max_edit_distance
+      true
     end
+  end
 
-    # Method that returns a new Match object for a matching path
-    def mismatch
-      path_mismatch                = self.dup
-      path_mismatch.length        += 1
-      path_mismatch.mismatches    += 1
-      path_mismatch.seq_index     += 1
-      path_mismatch.pattern_index += 1
-      path_mismatch
+  # Class to instantiate Score objects that holds score information.
+  class Score
+    attr_accessor :matches, :mismatches, :insertions, :deletions
+
+    def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0)
+      @matches    = matches
+      @mismatches = mismatches
+      @insertions = insertions
+      @deletions  = deletions
     end
 
-    # Method that returns a new Match object for a insertion path
-    def insertion
-      path_insertion                = self.dup
-      path_insertion.insertions    += 1
-      path_insertion.pattern_index += 1
-      path_insertion
+    # Method to calculate and return the edit distance.
+    def edit_distance
+      self.mismatches + self.insertions + self.deletions
     end
 
-    # Method that returns a new Match object for a deletion path
-    def deletion
-      path_deletion            = self.dup
-      path_deletion.length    += 1
-      path_deletion.deletions += 1
-      path_deletion.seq_index += 1
-      path_deletion
+    private    
+
+    def to_s
+      "(#{[self.matches, self.mismatches, self.insertions, self.deletions].join(',')})"
     end
   end
 
   # Class for creating Match objects which contain the description of a
   # match between a nucleotide sequence and a pattern.
   class Match
-    attr_reader :pos, :matches, :mismatches, :insertions, :deletions, :length
+    attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
 
-    def initialize(pos, matches, mismatches, insertions, deletions, length)
-      @pos        = pos
-      @matches    = matches
-      @mismatches = mismatches
-      @insertions = insertions
-      @deletions  = deletions
-      @length     = length
+    def initialize(pos, match, matches, mismatches, insertions, deletions, length)
+      @pos           = pos
+      @match         = match
+      @matches       = matches
+      @mismatches    = mismatches
+      @insertions    = insertions
+      @deletions     = deletions
+      @edit_distance = mismatches + insertions + deletions
+      @length        = length
     end
   end
 end
index efba4ff32d126a81e2dd9f2c6ac1a0a88b58348d..64dc0db65a40f93a8ece570d4ba083e629ec353e 100644 (file)
@@ -311,22 +311,17 @@ class Seq
   end
 
   # Method that finds an adaptor or part thereof in the sequence of a Seq object.
-  # Returns a Match object if the adaptor was found otherwise nil. The mis_percent,
-  # ins_percent, and del_percent indicate the maximum number of mismatches,
-  # insertions, and deletions allowed in all possible overlaps.
-  def adaptor_find(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0)
-    raise SeqError, "Mismatch percent out of range #{mis_percent}"  unless (0 .. 100).include? mis_percent
-    raise SeqError, "Insertion percent out of range #{ins_percent}" unless (0 .. 100).include? ins_percent
-    raise SeqError, "Deletion percent out of range #{del_percent}"  unless (0 .. 100).include? del_percent
+  # Returns a Match object if the adaptor was found otherwise nil. The ed_percent
+  # indicates the maximum edit distance allowed in all possible overlaps.
+  def adaptor_find(adaptor, ed_percent = 0)
+    raise SeqError, "Edit distance percent out of range #{ed_percent}" unless (0 .. 100).include? ed_percent
 
     pos = 0
 
     while adaptor.length > 0
-      mis_max = (adaptor.length * mis_percent * 0.01).round
-      ins_max = (adaptor.length * ins_percent * 0.01).round
-      del_max = (adaptor.length * del_percent * 0.01).round
+      ed_max = (adaptor.length * ed_percent * 0.01).round
 
-      match = self.match(adaptor, pos, mis_max, ins_max, del_max)
+      match = self.match(adaptor, pos, ed_max)
 
       return match unless match.nil?
 
index 940b67a534607dfbbbfe397c4be51f1fb1c2e4c7..799386223219c033f2916c116db80ec887c499bb 100755 (executable)
@@ -31,157 +31,106 @@ require 'pp'
 
 class TestPatternMatcher < Test::Unit::TestCase
   def setup
-    @entry = Seq.new("test", "atcg")
+    @p = Seq.new("test", "atcg")
   end
 
-  def test_PatternMatcher_match_with_perfect_match_returns_ok
-    assert_equal(4, @entry.match("atcg").matches)
-    assert_equal(2, @entry.match("cg").matches)
+  def test_PatternMatcher_no_match_returns_nil
+    assert_nil(@p.match("gggg"))
   end
 
-  def test_PatternMatcher_match_with_perfect_match_with_ambiguity_returns_ok
-    assert_equal(4, @entry.match("aNcg").matches)
+  def test_PatternMatcher_match_perfect_returns_correctly
+    m = @p.match("atcg")
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
   end
 
-  def test_PatternMatcher_match_with_fail_match_returns_nil
-    assert_nil(@entry.match("gggg"))
+  def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly
+    m = @p.match("nnnn")
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
   end
 
-  def test_PatternMatcher_match_with_one_mismatch_with_zero_allowed_returns_nil
-    assert_nil(@entry.match("aAcg"))
+  def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil
+    assert_nil(@p.match("aCcg"))
   end
 
-  def test_PatternMatcher_match_with_one_mismatch_with_one_allowed_returns_ok
-    assert_equal(1, @entry.match("aGcg", pos = 0, mismatches = 1).mismatches)
+  def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly
+    m = @p.match("aCcg", pos = 0, edit_distance = 1)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(3, m.matches)
+    assert_equal(1, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
   end
 
-  def test_PatternMatcher_match_with_two_mismatch_with_one_allowed_returns_nil
-    assert_nil(@entry.match("CtcA", pos = 0, mismatches = 1))
+  def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil
+    assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1))
   end
 
-  def test_PatternMatcher_match_with_two_mismatch_with_two_allowed_returns_ok
-    assert_equal(2, @entry.match("CtcA", pos = 0, mismatches = 2).mismatches)
+  def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil
+    assert_nil(@p.match("atGcg"))
   end
 
-  def test_PatternMatcher_match_with_one_insertion_with_zero_allowed_returns_nil
-    assert_nil(@entry.match("atTcg", pos = 0, mismatches = 0, insertions = 0))
-    assert_nil(@entry.match("Tatcg", pos = 0, mismatches = 0, insertions = 0))
-    assert_nil(@entry.match("atcgT", pos = 0, mismatches = 0, insertions = 0))
+  def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly
+    m = @p.match("atGcg", pos = 0, edit_distance = 1)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(1, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
   end
 
-  def test_PatternMatcher_match_with_one_insertion_with_one_allowed_returns_ok
-    assert_equal(1, @entry.match("atTcg", pos = 0, mismatches = 0, insertions = 1).insertions)
+  def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil
+    assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1))
   end
 
-  def test_PatternMatcher_match_with_two_insertion_with_one_allowed_returns_nil
-    assert_nil(@entry.match("aCCtcg", pos = 0, mismatches = 0, insertions = 1))
-    assert_nil(@entry.match("CCatcg", pos = 0, mismatches = 0, insertions = 1))
-    assert_nil(@entry.match("atcgCC", pos = 0, mismatches = 0, insertions = 1))
-    assert_nil(@entry.match("CatcgC", pos = 0, mismatches = 0, insertions = 1))
+  def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly
+    m = @p.match("atGcTg", pos = 0, edit_distance = 2)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(2, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
   end
 
-  def test_PatternMatcher_match_with_two_insertion_with_two_allowed_returns_ok
-    assert_equal(2, @entry.match("aCCtcg", pos = 0, mismatches = 0, insertions = 2).insertions)
-    assert_equal(2, @entry.match("CCatcg", pos = 0, mismatches = 0, insertions = 2).insertions)
+  def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil
+    assert_nil(@p.match("acg"))
   end
 
-  def test_PatternMatcher_match_with_one_deletion_with_zero_allowed_returns_nil
-    assert_nil(@entry.match("acg"))
-    assert_nil(@entry.match("atg"))
+  def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly
+    m = @p.match("acg", pos = 0, edit_distance = 1)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(3, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(1, m.deletions)
+    assert_equal(4, m.length)
   end
 
-  def test_PatternMatcher_match_with_one_deletion_with_one_allowed_returns_ok
-    assert_equal(1, @entry.match("tcg", pos = 0, mismatchses = 0, insertions = 0, deletions = 1).deletions)
-    assert_equal(1, @entry.match("acg", pos = 0, mismatchses = 0, insertions = 0, deletions = 1).deletions)
-    assert_equal(1, @entry.match("atg", pos = 0, mismatchses = 0, insertions = 0, deletions = 1).deletions)
+  def test_PatternMatcher_scan_locates_three_patterns_ok
+    p = Seq.new("test", "ataacgagctagctagctagctgactac")
+    assert_equal(3, p.scan("tag").count)
   end
 
-  #  atcg
-  # axdd
-  # g   x
-  def test_PatternMatcher_match_with_two_deletion_with_one_allowed_returns_nil
-    assert_nil(@entry.match("ag", pos = 0, mismatchses = 0, insertions = 0, deletions = 1))
-  end
-
-  def test_PatternMatcher_match_with_two_deletion_with_two_allowed_returns_ok
-    assert_equal(2, @entry.match("cg", pos = 0, mismatchses = 0, insertions = 0, deletions = 2).deletions)
-    assert_equal(2, @entry.match("tg", pos = 0, mismatchses = 0, insertions = 0, deletions = 2).deletions)
-    assert_equal(2, @entry.match("ag", pos = 0, mismatchses = 0, insertions = 0, deletions = 2).deletions)
-  end
-
-  def test_PatternMatcher_match_with_one_mismatch_one_insertions_one_deletion_returns_ok
-    assert_equal(1, @entry.match("ggtg", pos = 0, mismatchses = 1, insertions = 1, deletions = 1).mismatches)
-    assert_equal(1, @entry.match("ggtg", pos = 0, mismatchses = 1, insertions = 1, deletions = 1).insertions)
-    assert_equal(1, @entry.match("ggtg", pos = 0, mismatchses = 1, insertions = 1, deletions = 1).deletions)
-  end
-
-  #  atcgagctagctagctagctgactac
-  # ax
-  # t x
-  # g i
-  # g i
-  # c  x
-  # g   x
-  # 
-  # at--cg
-  # ||  ||
-  # atggcg
-  def test_PatternMatcher_match_with_two_insertions_and_two_allowed_returns_ok
-    entry = Seq.new("test", "atcgagctagctagctagctgactac")
-    assert_equal(2, entry.match("atggcg", pos = 0, mismatchses = 0, insertions = 2, deletions = 0).insertions)
-  end
-
-  #  atggcgagctagctagctagctgactac
-  # ax
-  # t xdd
-  # c    x
-  # g     x
-  # 
-  # atggcg
-  # ||  ||
-  # at--cg
-  def test_PatternMatcher_match_with_two_deletions_and_two_allowed_returns_ok
-    entry = Seq.new("test", "atggcgagctagctagctagctgactac")
-    assert_equal(2, entry.match("atcg", pos = 0, mismatchses = 0, insertions = 0, deletions = 2).deletions)
-  end
-
-  #  ataacgagctagctagctagctgactac
-  # ax
-  # gi
-  # t xdd
-  # c    x
-  # g     x
-  # 
-  # a-taacg
-  # | |  ||
-  # agt--cg
-  def test_PatternMatcher_match_with_one_insertions_and_two_deletions_all_allowed_returns_ok
-    entry = Seq.new("test", "ataacgagctagctagctagctgactac")
-    assert_equal(1, entry.match("agtcg", pos = 0, mismatchses = 0, insertions = 1, deletions = 2).insertions)
-    assert_equal(2, entry.match("agtcg", pos = 0, mismatchses = 0, insertions = 1, deletions = 2).deletions)
-  end
-
-  # --atcg
-  #   ||
-  # cgat
-  def test_PatternMatcher_match_overlapping_left_end_returns_ok
-    assert_equal(2, @entry.match("cgat", pos = 0, mismatches = 0, insertions = 2, deletions = 0).insertions)
-  end
-
-  # atcg
-  #   ||
-  # --cgag
-  def test_PatternMatcher_match_overlapping_right_end_returns_ok
-    assert_equal(2, @entry.match("cgag", pos = 0, mismatches = 0, insertions = 2, deletions = 0).insertions)
-  end
-
-  def test_Pattern_Matcher_scan_locates_three_patterns_ok
-    entry = Seq.new("test", "ataacgagctagctagctagctgactac")
-    assert_equal(3, entry.scan("tag").count)
-  end
-
-  def test_Pattern_Matcher_scan_with_pos_locates_two_patterns_ok
-    entry = Seq.new("test", "ataacgagctagctagctagctgactac")
-    assert_equal(2, entry.scan("tag", 10).count)
+  def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok
+    p = Seq.new("test", "ataacgagctagctagctagctgactac")
+    assert_equal(2, p.scan("tag", 10).count)
   end
 end
index 0520d06b1d9cc02112cd2a40e2378c74e4d19402..2a00c9009130555a5c220de20aad733aea710993 100755 (executable)
@@ -332,40 +332,16 @@ class TestSeq < Test::Unit::TestCase
     assert_equal(25.00, @entry.soft_mask)
   end
 
-  def test_Seq_adaptor_find_with_bad_mis_percent_raises
+  def test_Seq_adaptor_find_with_bad_ed_percent_raises
     @entry.seq = "actagctagctacgtacg"
-    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = -1) }
-    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 101) }
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", ed_percent = -1) }
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", ed_percent = 101) }
   end
 
-  def test_Seq_adaptor_find_with_ok_mis_percent_dont_raise
+  def test_Seq_adaptor_find_with_ok_ed_percent_dont_raise
     @entry.seq = "actagctagctacgtacg"
-    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0) }
-    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 100) }
-  end
-
-  def test_Seq_adaptor_find_with_bad_ins_percent_raises
-    @entry.seq = "actagctagctacgtacg"
-    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = -1) }
-    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 101) }
-  end
-
-  def test_Seq_adaptor_find_with_ok_ins_percent_dont_raise
-    @entry.seq = "actagctagctacgtacg"
-    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0) }
-    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 100) }
-  end
-
-  def test_Seq_adaptor_find_with_bad_del_percent_raises
-    @entry.seq = "actagctagctacgtacg"
-    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = -1) }
-    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 101) }
-  end
-
-  def test_Seq_adaptor_find_with_ok_del_percent_dont_raise
-    @entry.seq = "actagctagctacgtacg"
-    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 0) }
-    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 100) }
+    assert_nothing_raised { @entry.adaptor_find("tacg", ed_percent = 0) }
+    assert_nothing_raised { @entry.adaptor_find("tacg", ed_percent = 100) }
   end
 
   def test_Seq_adaptor_find_with_no_match_returns_nil
@@ -383,21 +359,21 @@ class TestSeq < Test::Unit::TestCase
     assert_equal(19, @entry.adaptor_find("gTTTT").pos)
   end
 
-  def test_Seq_adaptor_with_mis_percent_returns_correct_match
+  def test_Seq_adaptor_with_mis_and_ed_percent_returns_correct_match
     @entry.seq = "actaaggctagctacgtccg"
-    assert_equal(0, @entry.adaptor_find("GGGaag", mis_percent = 50).pos)
-    assert_equal(14, @entry.adaptor_find("cgtcTTTT", mis_percent = 50).pos)
+    assert_equal(0, @entry.adaptor_find("GGGaag", ed_percent = 50).pos)
+    assert_equal(14, @entry.adaptor_find("cgtcTTTT", ed_percent = 50).pos)
   end
 
-  def test_Seq_adaptor_with_ins_percent_returns_correct_match
+  def test_Seq_adaptor_with_ins_and_ed_percent_returns_correct_match
     @entry.seq = "actaaggctagctacgtccg"
-    assert_equal(0, @entry.adaptor_find("actGGGaag", mis_percent = 0, ins_percent = 50).pos)
-    assert_equal(15, @entry.adaptor_find("gtAccgTTTTT", mis_percent = 0, ins_percent = 10).pos)
+    assert_equal(0, @entry.adaptor_find("actGGGaag", ed_percent = 50).pos)
+    assert_equal(15, @entry.adaptor_find("gtAccgTTTTT", ed_percent = 10).pos)
   end
 
-  def test_Seq_adaptor_with_del_percent_returns_correct_match
+  def test_Seq_adaptor_with_del_and_ed_percent_returns_correct_match
     @entry.seq = "actaaggctagctacgtccg"
-    assert_equal(0, @entry.adaptor_find("actctag", mis_percent = 0, ins_percent = 0, del_percent = 50).pos)
+    assert_equal(0, @entry.adaptor_find("actctag", ed_percent = 50).pos)
   end
 end