]> git.donarmstrong.com Git - biopieces.git/commitdiff
moved patternmatcher.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 24 May 2012 13:30:02 +0000 (13:30 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 24 May 2012 13:30:02 +0000 (13:30 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1826 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/patternmatcher.rb [deleted file]
code_ruby/lib/maasha/seq.rb
code_ruby/lib/maasha/seq/patternmatcher.rb [new file with mode: 0644]
code_ruby/test/maasha/seq/test_patternmatcher.rb [new file with mode: 0755]
code_ruby/test/maasha/test_patternmatcher.rb [deleted file]

diff --git a/code_ruby/lib/maasha/patternmatcher.rb b/code_ruby/lib/maasha/patternmatcher.rb
deleted file mode 100644 (file)
index a07fd3a..0000000
+++ /dev/null
@@ -1,264 +0,0 @@
-# Copyright (C) 2007-2011 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# This software is part of the Biopieces framework (www.biopieces.org).
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# IUPAC nucleotide pair ambiguity equivalents are saved in an
-# array of bit fields.
-
-BIT_A = 1 << 0 
-BIT_T = 1 << 1 
-BIT_C = 1 << 2 
-BIT_G = 1 << 3 
-
-EQUAL          = Array.new(256, 0)
-EQUAL['A'.ord] = BIT_A 
-EQUAL['a'.ord] = BIT_A 
-EQUAL['T'.ord] = BIT_T 
-EQUAL['t'.ord] = BIT_T 
-EQUAL['U'.ord] = BIT_T 
-EQUAL['u'.ord] = BIT_T 
-EQUAL['C'.ord] = BIT_C 
-EQUAL['c'.ord] = BIT_C 
-EQUAL['G'.ord] = BIT_G 
-EQUAL['g'.ord] = BIT_G 
-EQUAL['M'.ord] = (BIT_A|BIT_C)
-EQUAL['m'.ord] = (BIT_A|BIT_C)
-EQUAL['R'.ord] = (BIT_A|BIT_G)
-EQUAL['r'.ord] = (BIT_A|BIT_G)
-EQUAL['W'.ord] = (BIT_A|BIT_T)
-EQUAL['w'.ord] = (BIT_A|BIT_T)
-EQUAL['S'.ord] = (BIT_C|BIT_G)
-EQUAL['s'.ord] = (BIT_C|BIT_G)
-EQUAL['Y'.ord] = (BIT_C|BIT_T)
-EQUAL['y'.ord] = (BIT_C|BIT_T)
-EQUAL['K'.ord] = (BIT_G|BIT_T)
-EQUAL['k'.ord] = (BIT_G|BIT_T)
-EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T)
-EQUAL['b'.ord] = (BIT_C|BIT_G|BIT_T)
-EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T)
-EQUAL['d'.ord] = (BIT_A|BIT_G|BIT_T)
-EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T)
-EQUAL['h'.ord] = (BIT_A|BIT_C|BIT_T)
-EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G)
-EQUAL['v'.ord] = (BIT_A|BIT_C|BIT_G)
-EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
-EQUAL['n'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
-
-# Module containing code to locate nucleotide patterns in sequences allowing for
-# ambiguity codes and a given maximum edit distance.
-# Insertions are nucleotides found in the pattern but not in the sequence.
-# Deletions are nucleotides found in the sequence but not in the pattern.
-#
-# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197):
-# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf
-module PatternMatcher
-  # ------------------------------------------------------------------------------
-  #   str.match(pattern[, pos[, max_edit_distance]])
-  #   -> Match or nil
-  #
-  # ------------------------------------------------------------------------------
-  # Method to locate the next pattern match starting from a given position. A match
-  # is allowed to contain a given maximum edit distance. If a match is located a 
-  # Match object will be returned otherwise nil.
-  def match(pattern, pos = 0, max_edit_distance = 0)
-    vector = Vector.new(@seq, pattern, max_edit_distance)
-
-    while pos < @seq.length
-      vector.update(pos)
-
-      return vector.to_match(pos) if vector.match_found?
-
-      pos += 1
-    end
-
-    nil   # no match
-  end
-
-  # ------------------------------------------------------------------------------
-  #   str.scan(pattern[, pos[, max_edit_distance]])
-  #   -> Array
-  #   str.scan(pattern[, pos[, max_edit_distance]]) { |match|
-  #     block
-  #   }
-  #   -> Match
-  #
-  # ------------------------------------------------------------------------------
-  # Method to iterate through a sequence to locate pattern matches starting
-  # from a given position and allowing for a maximum edit distance.
-  # Matches found in block context return the Match object. Otherwise matches are
-  # returned in an Array.
-  def scan(pattern, pos = 0, max_edit_distance = 0)
-    matches = []
-
-    while match = match(pattern, pos, max_edit_distance)
-      if block_given?
-        yield match
-      else
-        matches << match
-      end
-
-      pos = match.pos + 1
-    end
-
-    return matches unless block_given?
-  end
-end
-
-# Class containing the score vector used for locating matches.
-class Vector
-  # Method to initailize the score vector.
-  def initialize(seq, pattern, max_edit_distance)
-    @seq               = seq
-    @pattern           = pattern
-    @max_edit_distance = max_edit_distance
-    @vector            = []
-
-    (0 ... @pattern.length + 1).each do |i|
-      @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
-    end
-  end
-
-  # Method to update the score vector.
-  def update(pos)
-    score_diag = @vector[0]
-    score_up   = Score.new  # insertion
-    score_left = @vector[1] # deletion
-
-    (0 ... @pattern.length).each do |i|
-      if match?(@seq[pos], @pattern[i])
-        new_score = score_diag.dup
-        new_score.matches += 1
-      else
-        if deletion?(score_diag, score_up, score_left)
-          new_score = score_left.dup
-          new_score.deletions += 1
-        elsif mismatch?(score_diag, score_up, score_left)
-          new_score = score_diag.dup
-          new_score.mismatches += 1
-        elsif insertion?(score_diag, score_up, score_left)
-          new_score = score_up.dup
-          new_score.insertions += 1
-        end
-
-        new_score.edit_distance += 1
-      end
-
-      score_diag = @vector[i + 1]
-      score_up   = new_score
-      score_left = @vector[i + 2]
-
-      @vector[i + 1] = new_score
-    end
-  end
-
-  # Method that determines if a match was found by analyzing the score vector.
-  def match_found?
-    if @vector.last.edit_distance <= @max_edit_distance
-      true
-    end
-  end
-
-  # Method that returns a Match object initialized with
-  # information from the score vector.
-  def to_match(pos)
-    matches    = @vector.last.matches
-    mismatches = @vector.last.mismatches
-    insertions = @vector.last.insertions
-    deletions  = @vector.last.deletions
-    length     = @pattern.length - insertions + deletions
-    offset     = pos - length + 1
-    match      = @seq[offset ... offset + length]
-
-    Match.new(offset, match, matches, mismatches, insertions, deletions, length)
-  end
-
-  # Method to convert the score vector to a string.
-  def to_s
-    "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n"
-  end
-
-  private
-
-  # Method to determine if a match occurred.
-  def match?(char1, char2)
-    (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0
-  end
-
-  # Method to determine if a mismatch occured.
-  def mismatch?(score_diag, score_up, score_left)
-    if score_diag.edit_distance <= score_up.edit_distance and
-      score_diag.edit_distance <= score_left.edit_distance
-      true
-    end
-  end
-
-  # Method to determine if an insertion occured.
-  def insertion?(score_diag, score_up, score_left)
-    if score_up.edit_distance <= score_diag.edit_distance and
-      score_up.edit_distance <= score_left.edit_distance
-      true
-    end
-  end
-
-  # Method to determine if a deletion occured.
-  def deletion?(score_diag, score_up, score_left)
-    if score_left.edit_distance <= score_diag.edit_distance and
-      score_left.edit_distance <= score_up.edit_distance
-      true
-    end
-  end
-end
-
-# Class to instantiate Score objects that holds score information.
-class Score
-  attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance
-
-  def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0)
-    @matches       = matches
-    @mismatches    = mismatches
-    @insertions    = insertions
-    @deletions     = deletions
-    @edit_distance = edit_distance
-  end
-
-  def to_s
-    "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})"
-  end
-end
-
-# Class for creating Match objects which contain the description of a
-# match between a nucleotide sequence and a pattern.
-class Match
-  attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
-
-  def initialize(pos, match, matches, mismatches, insertions, deletions, length)
-    @pos           = pos
-    @match         = match
-    @matches       = matches
-    @mismatches    = mismatches
-    @insertions    = insertions
-    @deletions     = deletions
-    @edit_distance = mismatches + insertions + deletions
-    @length        = length
-  end
-end
index b45540bace7491a881eded44e440f13c07c43b7c..3c1c156b9e804edf3cc636a233ca8a085c37f92a 100644 (file)
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-require 'maasha/patternmatcher'
 require 'maasha/bits'
 require 'maasha/backtrack'
 require 'maasha/seq/digest'
+require 'maasha/seq/patternmatcher'
 require 'maasha/seq/trim'
 require 'narray'
 #require 'maasha/patscan'
diff --git a/code_ruby/lib/maasha/seq/patternmatcher.rb b/code_ruby/lib/maasha/seq/patternmatcher.rb
new file mode 100644 (file)
index 0000000..a07fd3a
--- /dev/null
@@ -0,0 +1,264 @@
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# IUPAC nucleotide pair ambiguity equivalents are saved in an
+# array of bit fields.
+
+BIT_A = 1 << 0 
+BIT_T = 1 << 1 
+BIT_C = 1 << 2 
+BIT_G = 1 << 3 
+
+EQUAL          = Array.new(256, 0)
+EQUAL['A'.ord] = BIT_A 
+EQUAL['a'.ord] = BIT_A 
+EQUAL['T'.ord] = BIT_T 
+EQUAL['t'.ord] = BIT_T 
+EQUAL['U'.ord] = BIT_T 
+EQUAL['u'.ord] = BIT_T 
+EQUAL['C'.ord] = BIT_C 
+EQUAL['c'.ord] = BIT_C 
+EQUAL['G'.ord] = BIT_G 
+EQUAL['g'.ord] = BIT_G 
+EQUAL['M'.ord] = (BIT_A|BIT_C)
+EQUAL['m'.ord] = (BIT_A|BIT_C)
+EQUAL['R'.ord] = (BIT_A|BIT_G)
+EQUAL['r'.ord] = (BIT_A|BIT_G)
+EQUAL['W'.ord] = (BIT_A|BIT_T)
+EQUAL['w'.ord] = (BIT_A|BIT_T)
+EQUAL['S'.ord] = (BIT_C|BIT_G)
+EQUAL['s'.ord] = (BIT_C|BIT_G)
+EQUAL['Y'.ord] = (BIT_C|BIT_T)
+EQUAL['y'.ord] = (BIT_C|BIT_T)
+EQUAL['K'.ord] = (BIT_G|BIT_T)
+EQUAL['k'.ord] = (BIT_G|BIT_T)
+EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T)
+EQUAL['b'.ord] = (BIT_C|BIT_G|BIT_T)
+EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T)
+EQUAL['d'.ord] = (BIT_A|BIT_G|BIT_T)
+EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T)
+EQUAL['h'.ord] = (BIT_A|BIT_C|BIT_T)
+EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G)
+EQUAL['v'.ord] = (BIT_A|BIT_C|BIT_G)
+EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
+EQUAL['n'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
+
+# Module containing code to locate nucleotide patterns in sequences allowing for
+# ambiguity codes and a given maximum edit distance.
+# Insertions are nucleotides found in the pattern but not in the sequence.
+# Deletions are nucleotides found in the sequence but not in the pattern.
+#
+# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197):
+# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf
+module PatternMatcher
+  # ------------------------------------------------------------------------------
+  #   str.match(pattern[, pos[, max_edit_distance]])
+  #   -> Match or nil
+  #
+  # ------------------------------------------------------------------------------
+  # Method to locate the next pattern match starting from a given position. A match
+  # is allowed to contain a given maximum edit distance. If a match is located a 
+  # Match object will be returned otherwise nil.
+  def match(pattern, pos = 0, max_edit_distance = 0)
+    vector = Vector.new(@seq, pattern, max_edit_distance)
+
+    while pos < @seq.length
+      vector.update(pos)
+
+      return vector.to_match(pos) if vector.match_found?
+
+      pos += 1
+    end
+
+    nil   # no match
+  end
+
+  # ------------------------------------------------------------------------------
+  #   str.scan(pattern[, pos[, max_edit_distance]])
+  #   -> Array
+  #   str.scan(pattern[, pos[, max_edit_distance]]) { |match|
+  #     block
+  #   }
+  #   -> Match
+  #
+  # ------------------------------------------------------------------------------
+  # Method to iterate through a sequence to locate pattern matches starting
+  # from a given position and allowing for a maximum edit distance.
+  # Matches found in block context return the Match object. Otherwise matches are
+  # returned in an Array.
+  def scan(pattern, pos = 0, max_edit_distance = 0)
+    matches = []
+
+    while match = match(pattern, pos, max_edit_distance)
+      if block_given?
+        yield match
+      else
+        matches << match
+      end
+
+      pos = match.pos + 1
+    end
+
+    return matches unless block_given?
+  end
+end
+
+# Class containing the score vector used for locating matches.
+class Vector
+  # Method to initailize the score vector.
+  def initialize(seq, pattern, max_edit_distance)
+    @seq               = seq
+    @pattern           = pattern
+    @max_edit_distance = max_edit_distance
+    @vector            = []
+
+    (0 ... @pattern.length + 1).each do |i|
+      @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
+    end
+  end
+
+  # Method to update the score vector.
+  def update(pos)
+    score_diag = @vector[0]
+    score_up   = Score.new  # insertion
+    score_left = @vector[1] # deletion
+
+    (0 ... @pattern.length).each do |i|
+      if match?(@seq[pos], @pattern[i])
+        new_score = score_diag.dup
+        new_score.matches += 1
+      else
+        if deletion?(score_diag, score_up, score_left)
+          new_score = score_left.dup
+          new_score.deletions += 1
+        elsif mismatch?(score_diag, score_up, score_left)
+          new_score = score_diag.dup
+          new_score.mismatches += 1
+        elsif insertion?(score_diag, score_up, score_left)
+          new_score = score_up.dup
+          new_score.insertions += 1
+        end
+
+        new_score.edit_distance += 1
+      end
+
+      score_diag = @vector[i + 1]
+      score_up   = new_score
+      score_left = @vector[i + 2]
+
+      @vector[i + 1] = new_score
+    end
+  end
+
+  # Method that determines if a match was found by analyzing the score vector.
+  def match_found?
+    if @vector.last.edit_distance <= @max_edit_distance
+      true
+    end
+  end
+
+  # Method that returns a Match object initialized with
+  # information from the score vector.
+  def to_match(pos)
+    matches    = @vector.last.matches
+    mismatches = @vector.last.mismatches
+    insertions = @vector.last.insertions
+    deletions  = @vector.last.deletions
+    length     = @pattern.length - insertions + deletions
+    offset     = pos - length + 1
+    match      = @seq[offset ... offset + length]
+
+    Match.new(offset, match, matches, mismatches, insertions, deletions, length)
+  end
+
+  # Method to convert the score vector to a string.
+  def to_s
+    "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n"
+  end
+
+  private
+
+  # Method to determine if a match occurred.
+  def match?(char1, char2)
+    (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0
+  end
+
+  # Method to determine if a mismatch occured.
+  def mismatch?(score_diag, score_up, score_left)
+    if score_diag.edit_distance <= score_up.edit_distance and
+      score_diag.edit_distance <= score_left.edit_distance
+      true
+    end
+  end
+
+  # Method to determine if an insertion occured.
+  def insertion?(score_diag, score_up, score_left)
+    if score_up.edit_distance <= score_diag.edit_distance and
+      score_up.edit_distance <= score_left.edit_distance
+      true
+    end
+  end
+
+  # Method to determine if a deletion occured.
+  def deletion?(score_diag, score_up, score_left)
+    if score_left.edit_distance <= score_diag.edit_distance and
+      score_left.edit_distance <= score_up.edit_distance
+      true
+    end
+  end
+end
+
+# Class to instantiate Score objects that holds score information.
+class Score
+  attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance
+
+  def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0)
+    @matches       = matches
+    @mismatches    = mismatches
+    @insertions    = insertions
+    @deletions     = deletions
+    @edit_distance = edit_distance
+  end
+
+  def to_s
+    "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})"
+  end
+end
+
+# Class for creating Match objects which contain the description of a
+# match between a nucleotide sequence and a pattern.
+class Match
+  attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
+
+  def initialize(pos, match, matches, mismatches, insertions, deletions, length)
+    @pos           = pos
+    @match         = match
+    @matches       = matches
+    @mismatches    = mismatches
+    @insertions    = insertions
+    @deletions     = deletions
+    @edit_distance = mismatches + insertions + deletions
+    @length        = length
+  end
+end
diff --git a/code_ruby/test/maasha/seq/test_patternmatcher.rb b/code_ruby/test/maasha/seq/test_patternmatcher.rb
new file mode 100755 (executable)
index 0000000..9a7a50e
--- /dev/null
@@ -0,0 +1,136 @@
+#!/usr/bin/env ruby
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'maasha/seq'
+require 'test/unit'
+require 'pp'
+
+class TestPatternMatcher < Test::Unit::TestCase
+  def setup
+    @p = Seq.new("test", "atcg")
+  end
+
+  def test_PatternMatcher_no_match_returns_nil
+    assert_nil(@p.match("gggg"))
+  end
+
+  def test_PatternMatcher_match_perfect_returns_correctly
+    m = @p.match("atcg")
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
+  end
+
+  def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly
+    m = @p.match("nnnn")
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
+  end
+
+  def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil
+    assert_nil(@p.match("aCcg"))
+  end
+
+  def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly
+    m = @p.match("aCcg", pos = 0, edit_distance = 1)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(3, m.matches)
+    assert_equal(1, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
+  end
+
+  def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil
+    assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1))
+  end
+
+  def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil
+    assert_nil(@p.match("atGcg"))
+  end
+
+  def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly
+    m = @p.match("atGcg", pos = 0, edit_distance = 1)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(1, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
+  end
+
+  def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil
+    assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1))
+  end
+
+  def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly
+    m = @p.match("atGcTg", pos = 0, edit_distance = 2)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(4, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(2, m.insertions)
+    assert_equal(0, m.deletions)
+    assert_equal(4, m.length)
+  end
+
+  def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil
+    assert_nil(@p.match("acg"))
+  end
+
+  def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly
+    m = @p.match("acg", pos = 0, edit_distance = 1)
+    assert_equal(0, m.pos)
+    assert_equal("atcg", m.match)
+    assert_equal(3, m.matches)
+    assert_equal(0, m.mismatches)
+    assert_equal(0, m.insertions)
+    assert_equal(1, m.deletions)
+    assert_equal(4, m.length)
+  end
+
+  def test_PatternMatcher_scan_locates_three_patterns_ok
+    p = Seq.new("test", "ataacgagctagctagctagctgactac")
+    assert_equal(3, p.scan("tag").count)
+  end
+
+  def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok
+    p = Seq.new("test", "ataacgagctagctagctagctgactac")
+    assert_equal(2, p.scan("tag", 10).count)
+  end
+end
diff --git a/code_ruby/test/maasha/test_patternmatcher.rb b/code_ruby/test/maasha/test_patternmatcher.rb
deleted file mode 100755 (executable)
index 9a7a50e..0000000
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env ruby
-$:.unshift File.join(File.dirname(__FILE__),'..','lib')
-
-# Copyright (C) 2007-2010 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# This software is part of the Biopieces framework (www.biopieces.org).
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-require 'maasha/seq'
-require 'test/unit'
-require 'pp'
-
-class TestPatternMatcher < Test::Unit::TestCase
-  def setup
-    @p = Seq.new("test", "atcg")
-  end
-
-  def test_PatternMatcher_no_match_returns_nil
-    assert_nil(@p.match("gggg"))
-  end
-
-  def test_PatternMatcher_match_perfect_returns_correctly
-    m = @p.match("atcg")
-    assert_equal(0, m.pos)
-    assert_equal("atcg", m.match)
-    assert_equal(4, m.matches)
-    assert_equal(0, m.mismatches)
-    assert_equal(0, m.insertions)
-    assert_equal(0, m.deletions)
-    assert_equal(4, m.length)
-  end
-
-  def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly
-    m = @p.match("nnnn")
-    assert_equal(0, m.pos)
-    assert_equal("atcg", m.match)
-    assert_equal(4, m.matches)
-    assert_equal(0, m.mismatches)
-    assert_equal(0, m.insertions)
-    assert_equal(0, m.deletions)
-    assert_equal(4, m.length)
-  end
-
-  def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil
-    assert_nil(@p.match("aCcg"))
-  end
-
-  def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly
-    m = @p.match("aCcg", pos = 0, edit_distance = 1)
-    assert_equal(0, m.pos)
-    assert_equal("atcg", m.match)
-    assert_equal(3, m.matches)
-    assert_equal(1, m.mismatches)
-    assert_equal(0, m.insertions)
-    assert_equal(0, m.deletions)
-    assert_equal(4, m.length)
-  end
-
-  def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil
-    assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1))
-  end
-
-  def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil
-    assert_nil(@p.match("atGcg"))
-  end
-
-  def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly
-    m = @p.match("atGcg", pos = 0, edit_distance = 1)
-    assert_equal(0, m.pos)
-    assert_equal("atcg", m.match)
-    assert_equal(4, m.matches)
-    assert_equal(0, m.mismatches)
-    assert_equal(1, m.insertions)
-    assert_equal(0, m.deletions)
-    assert_equal(4, m.length)
-  end
-
-  def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil
-    assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1))
-  end
-
-  def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly
-    m = @p.match("atGcTg", pos = 0, edit_distance = 2)
-    assert_equal(0, m.pos)
-    assert_equal("atcg", m.match)
-    assert_equal(4, m.matches)
-    assert_equal(0, m.mismatches)
-    assert_equal(2, m.insertions)
-    assert_equal(0, m.deletions)
-    assert_equal(4, m.length)
-  end
-
-  def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil
-    assert_nil(@p.match("acg"))
-  end
-
-  def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly
-    m = @p.match("acg", pos = 0, edit_distance = 1)
-    assert_equal(0, m.pos)
-    assert_equal("atcg", m.match)
-    assert_equal(3, m.matches)
-    assert_equal(0, m.mismatches)
-    assert_equal(0, m.insertions)
-    assert_equal(1, m.deletions)
-    assert_equal(4, m.length)
-  end
-
-  def test_PatternMatcher_scan_locates_three_patterns_ok
-    p = Seq.new("test", "ataacgagctagctagctagctgactac")
-    assert_equal(3, p.scan("tag").count)
-  end
-
-  def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok
-    p = Seq.new("test", "ataacgagctagctagctagctgactac")
-    assert_equal(2, p.scan("tag", 10).count)
-  end
-end