+++ /dev/null
-# Copyright (C) 2007-2011 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# This software is part of the Biopieces framework (www.biopieces.org).
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# IUPAC nucleotide pair ambiguity equivalents are saved in an
-# array of bit fields.
-
-BIT_A = 1 << 0
-BIT_T = 1 << 1
-BIT_C = 1 << 2
-BIT_G = 1 << 3
-
-EQUAL = Array.new(256, 0)
-EQUAL['A'.ord] = BIT_A
-EQUAL['a'.ord] = BIT_A
-EQUAL['T'.ord] = BIT_T
-EQUAL['t'.ord] = BIT_T
-EQUAL['U'.ord] = BIT_T
-EQUAL['u'.ord] = BIT_T
-EQUAL['C'.ord] = BIT_C
-EQUAL['c'.ord] = BIT_C
-EQUAL['G'.ord] = BIT_G
-EQUAL['g'.ord] = BIT_G
-EQUAL['M'.ord] = (BIT_A|BIT_C)
-EQUAL['m'.ord] = (BIT_A|BIT_C)
-EQUAL['R'.ord] = (BIT_A|BIT_G)
-EQUAL['r'.ord] = (BIT_A|BIT_G)
-EQUAL['W'.ord] = (BIT_A|BIT_T)
-EQUAL['w'.ord] = (BIT_A|BIT_T)
-EQUAL['S'.ord] = (BIT_C|BIT_G)
-EQUAL['s'.ord] = (BIT_C|BIT_G)
-EQUAL['Y'.ord] = (BIT_C|BIT_T)
-EQUAL['y'.ord] = (BIT_C|BIT_T)
-EQUAL['K'.ord] = (BIT_G|BIT_T)
-EQUAL['k'.ord] = (BIT_G|BIT_T)
-EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T)
-EQUAL['b'.ord] = (BIT_C|BIT_G|BIT_T)
-EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T)
-EQUAL['d'.ord] = (BIT_A|BIT_G|BIT_T)
-EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T)
-EQUAL['h'.ord] = (BIT_A|BIT_C|BIT_T)
-EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G)
-EQUAL['v'.ord] = (BIT_A|BIT_C|BIT_G)
-EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
-EQUAL['n'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
-
-# Module containing code to locate nucleotide patterns in sequences allowing for
-# ambiguity codes and a given maximum edit distance.
-# Insertions are nucleotides found in the pattern but not in the sequence.
-# Deletions are nucleotides found in the sequence but not in the pattern.
-#
-# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197):
-# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf
-module PatternMatcher
- # ------------------------------------------------------------------------------
- # str.match(pattern[, pos[, max_edit_distance]])
- # -> Match or nil
- #
- # ------------------------------------------------------------------------------
- # Method to locate the next pattern match starting from a given position. A match
- # is allowed to contain a given maximum edit distance. If a match is located a
- # Match object will be returned otherwise nil.
- def match(pattern, pos = 0, max_edit_distance = 0)
- vector = Vector.new(@seq, pattern, max_edit_distance)
-
- while pos < @seq.length
- vector.update(pos)
-
- return vector.to_match(pos) if vector.match_found?
-
- pos += 1
- end
-
- nil # no match
- end
-
- # ------------------------------------------------------------------------------
- # str.scan(pattern[, pos[, max_edit_distance]])
- # -> Array
- # str.scan(pattern[, pos[, max_edit_distance]]) { |match|
- # block
- # }
- # -> Match
- #
- # ------------------------------------------------------------------------------
- # Method to iterate through a sequence to locate pattern matches starting
- # from a given position and allowing for a maximum edit distance.
- # Matches found in block context return the Match object. Otherwise matches are
- # returned in an Array.
- def scan(pattern, pos = 0, max_edit_distance = 0)
- matches = []
-
- while match = match(pattern, pos, max_edit_distance)
- if block_given?
- yield match
- else
- matches << match
- end
-
- pos = match.pos + 1
- end
-
- return matches unless block_given?
- end
-end
-
-# Class containing the score vector used for locating matches.
-class Vector
- # Method to initailize the score vector.
- def initialize(seq, pattern, max_edit_distance)
- @seq = seq
- @pattern = pattern
- @max_edit_distance = max_edit_distance
- @vector = []
-
- (0 ... @pattern.length + 1).each do |i|
- @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
- end
- end
-
- # Method to update the score vector.
- def update(pos)
- score_diag = @vector[0]
- score_up = Score.new # insertion
- score_left = @vector[1] # deletion
-
- (0 ... @pattern.length).each do |i|
- if match?(@seq[pos], @pattern[i])
- new_score = score_diag.dup
- new_score.matches += 1
- else
- if deletion?(score_diag, score_up, score_left)
- new_score = score_left.dup
- new_score.deletions += 1
- elsif mismatch?(score_diag, score_up, score_left)
- new_score = score_diag.dup
- new_score.mismatches += 1
- elsif insertion?(score_diag, score_up, score_left)
- new_score = score_up.dup
- new_score.insertions += 1
- end
-
- new_score.edit_distance += 1
- end
-
- score_diag = @vector[i + 1]
- score_up = new_score
- score_left = @vector[i + 2]
-
- @vector[i + 1] = new_score
- end
- end
-
- # Method that determines if a match was found by analyzing the score vector.
- def match_found?
- if @vector.last.edit_distance <= @max_edit_distance
- true
- end
- end
-
- # Method that returns a Match object initialized with
- # information from the score vector.
- def to_match(pos)
- matches = @vector.last.matches
- mismatches = @vector.last.mismatches
- insertions = @vector.last.insertions
- deletions = @vector.last.deletions
- length = @pattern.length - insertions + deletions
- offset = pos - length + 1
- match = @seq[offset ... offset + length]
-
- Match.new(offset, match, matches, mismatches, insertions, deletions, length)
- end
-
- # Method to convert the score vector to a string.
- def to_s
- "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n"
- end
-
- private
-
- # Method to determine if a match occurred.
- def match?(char1, char2)
- (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0
- end
-
- # Method to determine if a mismatch occured.
- def mismatch?(score_diag, score_up, score_left)
- if score_diag.edit_distance <= score_up.edit_distance and
- score_diag.edit_distance <= score_left.edit_distance
- true
- end
- end
-
- # Method to determine if an insertion occured.
- def insertion?(score_diag, score_up, score_left)
- if score_up.edit_distance <= score_diag.edit_distance and
- score_up.edit_distance <= score_left.edit_distance
- true
- end
- end
-
- # Method to determine if a deletion occured.
- def deletion?(score_diag, score_up, score_left)
- if score_left.edit_distance <= score_diag.edit_distance and
- score_left.edit_distance <= score_up.edit_distance
- true
- end
- end
-end
-
-# Class to instantiate Score objects that holds score information.
-class Score
- attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance
-
- def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0)
- @matches = matches
- @mismatches = mismatches
- @insertions = insertions
- @deletions = deletions
- @edit_distance = edit_distance
- end
-
- def to_s
- "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})"
- end
-end
-
-# Class for creating Match objects which contain the description of a
-# match between a nucleotide sequence and a pattern.
-class Match
- attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
-
- def initialize(pos, match, matches, mismatches, insertions, deletions, length)
- @pos = pos
- @match = match
- @matches = matches
- @mismatches = mismatches
- @insertions = insertions
- @deletions = deletions
- @edit_distance = mismatches + insertions + deletions
- @length = length
- end
-end
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-require 'maasha/patternmatcher'
require 'maasha/bits'
require 'maasha/backtrack'
require 'maasha/seq/digest'
+require 'maasha/seq/patternmatcher'
require 'maasha/seq/trim'
require 'narray'
#require 'maasha/patscan'
--- /dev/null
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# IUPAC nucleotide pair ambiguity equivalents are saved in an
+# array of bit fields.
+
+BIT_A = 1 << 0
+BIT_T = 1 << 1
+BIT_C = 1 << 2
+BIT_G = 1 << 3
+
+EQUAL = Array.new(256, 0)
+EQUAL['A'.ord] = BIT_A
+EQUAL['a'.ord] = BIT_A
+EQUAL['T'.ord] = BIT_T
+EQUAL['t'.ord] = BIT_T
+EQUAL['U'.ord] = BIT_T
+EQUAL['u'.ord] = BIT_T
+EQUAL['C'.ord] = BIT_C
+EQUAL['c'.ord] = BIT_C
+EQUAL['G'.ord] = BIT_G
+EQUAL['g'.ord] = BIT_G
+EQUAL['M'.ord] = (BIT_A|BIT_C)
+EQUAL['m'.ord] = (BIT_A|BIT_C)
+EQUAL['R'.ord] = (BIT_A|BIT_G)
+EQUAL['r'.ord] = (BIT_A|BIT_G)
+EQUAL['W'.ord] = (BIT_A|BIT_T)
+EQUAL['w'.ord] = (BIT_A|BIT_T)
+EQUAL['S'.ord] = (BIT_C|BIT_G)
+EQUAL['s'.ord] = (BIT_C|BIT_G)
+EQUAL['Y'.ord] = (BIT_C|BIT_T)
+EQUAL['y'.ord] = (BIT_C|BIT_T)
+EQUAL['K'.ord] = (BIT_G|BIT_T)
+EQUAL['k'.ord] = (BIT_G|BIT_T)
+EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T)
+EQUAL['b'.ord] = (BIT_C|BIT_G|BIT_T)
+EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T)
+EQUAL['d'.ord] = (BIT_A|BIT_G|BIT_T)
+EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T)
+EQUAL['h'.ord] = (BIT_A|BIT_C|BIT_T)
+EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G)
+EQUAL['v'.ord] = (BIT_A|BIT_C|BIT_G)
+EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
+EQUAL['n'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
+
+# Module containing code to locate nucleotide patterns in sequences allowing for
+# ambiguity codes and a given maximum edit distance.
+# Insertions are nucleotides found in the pattern but not in the sequence.
+# Deletions are nucleotides found in the sequence but not in the pattern.
+#
+# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197):
+# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf
+module PatternMatcher
+ # ------------------------------------------------------------------------------
+ # str.match(pattern[, pos[, max_edit_distance]])
+ # -> Match or nil
+ #
+ # ------------------------------------------------------------------------------
+ # Method to locate the next pattern match starting from a given position. A match
+ # is allowed to contain a given maximum edit distance. If a match is located a
+ # Match object will be returned otherwise nil.
+ def match(pattern, pos = 0, max_edit_distance = 0)
+ vector = Vector.new(@seq, pattern, max_edit_distance)
+
+ while pos < @seq.length
+ vector.update(pos)
+
+ return vector.to_match(pos) if vector.match_found?
+
+ pos += 1
+ end
+
+ nil # no match
+ end
+
+ # ------------------------------------------------------------------------------
+ # str.scan(pattern[, pos[, max_edit_distance]])
+ # -> Array
+ # str.scan(pattern[, pos[, max_edit_distance]]) { |match|
+ # block
+ # }
+ # -> Match
+ #
+ # ------------------------------------------------------------------------------
+ # Method to iterate through a sequence to locate pattern matches starting
+ # from a given position and allowing for a maximum edit distance.
+ # Matches found in block context return the Match object. Otherwise matches are
+ # returned in an Array.
+ def scan(pattern, pos = 0, max_edit_distance = 0)
+ matches = []
+
+ while match = match(pattern, pos, max_edit_distance)
+ if block_given?
+ yield match
+ else
+ matches << match
+ end
+
+ pos = match.pos + 1
+ end
+
+ return matches unless block_given?
+ end
+end
+
+# Class containing the score vector used for locating matches.
+class Vector
+ # Method to initailize the score vector.
+ def initialize(seq, pattern, max_edit_distance)
+ @seq = seq
+ @pattern = pattern
+ @max_edit_distance = max_edit_distance
+ @vector = []
+
+ (0 ... @pattern.length + 1).each do |i|
+ @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i)
+ end
+ end
+
+ # Method to update the score vector.
+ def update(pos)
+ score_diag = @vector[0]
+ score_up = Score.new # insertion
+ score_left = @vector[1] # deletion
+
+ (0 ... @pattern.length).each do |i|
+ if match?(@seq[pos], @pattern[i])
+ new_score = score_diag.dup
+ new_score.matches += 1
+ else
+ if deletion?(score_diag, score_up, score_left)
+ new_score = score_left.dup
+ new_score.deletions += 1
+ elsif mismatch?(score_diag, score_up, score_left)
+ new_score = score_diag.dup
+ new_score.mismatches += 1
+ elsif insertion?(score_diag, score_up, score_left)
+ new_score = score_up.dup
+ new_score.insertions += 1
+ end
+
+ new_score.edit_distance += 1
+ end
+
+ score_diag = @vector[i + 1]
+ score_up = new_score
+ score_left = @vector[i + 2]
+
+ @vector[i + 1] = new_score
+ end
+ end
+
+ # Method that determines if a match was found by analyzing the score vector.
+ def match_found?
+ if @vector.last.edit_distance <= @max_edit_distance
+ true
+ end
+ end
+
+ # Method that returns a Match object initialized with
+ # information from the score vector.
+ def to_match(pos)
+ matches = @vector.last.matches
+ mismatches = @vector.last.mismatches
+ insertions = @vector.last.insertions
+ deletions = @vector.last.deletions
+ length = @pattern.length - insertions + deletions
+ offset = pos - length + 1
+ match = @seq[offset ... offset + length]
+
+ Match.new(offset, match, matches, mismatches, insertions, deletions, length)
+ end
+
+ # Method to convert the score vector to a string.
+ def to_s
+ "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n"
+ end
+
+ private
+
+ # Method to determine if a match occurred.
+ def match?(char1, char2)
+ (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0
+ end
+
+ # Method to determine if a mismatch occured.
+ def mismatch?(score_diag, score_up, score_left)
+ if score_diag.edit_distance <= score_up.edit_distance and
+ score_diag.edit_distance <= score_left.edit_distance
+ true
+ end
+ end
+
+ # Method to determine if an insertion occured.
+ def insertion?(score_diag, score_up, score_left)
+ if score_up.edit_distance <= score_diag.edit_distance and
+ score_up.edit_distance <= score_left.edit_distance
+ true
+ end
+ end
+
+ # Method to determine if a deletion occured.
+ def deletion?(score_diag, score_up, score_left)
+ if score_left.edit_distance <= score_diag.edit_distance and
+ score_left.edit_distance <= score_up.edit_distance
+ true
+ end
+ end
+end
+
+# Class to instantiate Score objects that holds score information.
+class Score
+ attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance
+
+ def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0)
+ @matches = matches
+ @mismatches = mismatches
+ @insertions = insertions
+ @deletions = deletions
+ @edit_distance = edit_distance
+ end
+
+ def to_s
+ "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})"
+ end
+end
+
+# Class for creating Match objects which contain the description of a
+# match between a nucleotide sequence and a pattern.
+class Match
+ attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length
+
+ def initialize(pos, match, matches, mismatches, insertions, deletions, length)
+ @pos = pos
+ @match = match
+ @matches = matches
+ @mismatches = mismatches
+ @insertions = insertions
+ @deletions = deletions
+ @edit_distance = mismatches + insertions + deletions
+ @length = length
+ end
+end
--- /dev/null
+#!/usr/bin/env ruby
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'maasha/seq'
+require 'test/unit'
+require 'pp'
+
+class TestPatternMatcher < Test::Unit::TestCase
+ def setup
+ @p = Seq.new("test", "atcg")
+ end
+
+ def test_PatternMatcher_no_match_returns_nil
+ assert_nil(@p.match("gggg"))
+ end
+
+ def test_PatternMatcher_match_perfect_returns_correctly
+ m = @p.match("atcg")
+ assert_equal(0, m.pos)
+ assert_equal("atcg", m.match)
+ assert_equal(4, m.matches)
+ assert_equal(0, m.mismatches)
+ assert_equal(0, m.insertions)
+ assert_equal(0, m.deletions)
+ assert_equal(4, m.length)
+ end
+
+ def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly
+ m = @p.match("nnnn")
+ assert_equal(0, m.pos)
+ assert_equal("atcg", m.match)
+ assert_equal(4, m.matches)
+ assert_equal(0, m.mismatches)
+ assert_equal(0, m.insertions)
+ assert_equal(0, m.deletions)
+ assert_equal(4, m.length)
+ end
+
+ def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil
+ assert_nil(@p.match("aCcg"))
+ end
+
+ def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly
+ m = @p.match("aCcg", pos = 0, edit_distance = 1)
+ assert_equal(0, m.pos)
+ assert_equal("atcg", m.match)
+ assert_equal(3, m.matches)
+ assert_equal(1, m.mismatches)
+ assert_equal(0, m.insertions)
+ assert_equal(0, m.deletions)
+ assert_equal(4, m.length)
+ end
+
+ def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil
+ assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1))
+ end
+
+ def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil
+ assert_nil(@p.match("atGcg"))
+ end
+
+ def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly
+ m = @p.match("atGcg", pos = 0, edit_distance = 1)
+ assert_equal(0, m.pos)
+ assert_equal("atcg", m.match)
+ assert_equal(4, m.matches)
+ assert_equal(0, m.mismatches)
+ assert_equal(1, m.insertions)
+ assert_equal(0, m.deletions)
+ assert_equal(4, m.length)
+ end
+
+ def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil
+ assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1))
+ end
+
+ def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly
+ m = @p.match("atGcTg", pos = 0, edit_distance = 2)
+ assert_equal(0, m.pos)
+ assert_equal("atcg", m.match)
+ assert_equal(4, m.matches)
+ assert_equal(0, m.mismatches)
+ assert_equal(2, m.insertions)
+ assert_equal(0, m.deletions)
+ assert_equal(4, m.length)
+ end
+
+ def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil
+ assert_nil(@p.match("acg"))
+ end
+
+ def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly
+ m = @p.match("acg", pos = 0, edit_distance = 1)
+ assert_equal(0, m.pos)
+ assert_equal("atcg", m.match)
+ assert_equal(3, m.matches)
+ assert_equal(0, m.mismatches)
+ assert_equal(0, m.insertions)
+ assert_equal(1, m.deletions)
+ assert_equal(4, m.length)
+ end
+
+ def test_PatternMatcher_scan_locates_three_patterns_ok
+ p = Seq.new("test", "ataacgagctagctagctagctgactac")
+ assert_equal(3, p.scan("tag").count)
+ end
+
+ def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok
+ p = Seq.new("test", "ataacgagctagctagctagctgactac")
+ assert_equal(2, p.scan("tag", 10).count)
+ end
+end
+++ /dev/null
-#!/usr/bin/env ruby
-$:.unshift File.join(File.dirname(__FILE__),'..','lib')
-
-# Copyright (C) 2007-2010 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# This software is part of the Biopieces framework (www.biopieces.org).
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-require 'maasha/seq'
-require 'test/unit'
-require 'pp'
-
-class TestPatternMatcher < Test::Unit::TestCase
- def setup
- @p = Seq.new("test", "atcg")
- end
-
- def test_PatternMatcher_no_match_returns_nil
- assert_nil(@p.match("gggg"))
- end
-
- def test_PatternMatcher_match_perfect_returns_correctly
- m = @p.match("atcg")
- assert_equal(0, m.pos)
- assert_equal("atcg", m.match)
- assert_equal(4, m.matches)
- assert_equal(0, m.mismatches)
- assert_equal(0, m.insertions)
- assert_equal(0, m.deletions)
- assert_equal(4, m.length)
- end
-
- def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly
- m = @p.match("nnnn")
- assert_equal(0, m.pos)
- assert_equal("atcg", m.match)
- assert_equal(4, m.matches)
- assert_equal(0, m.mismatches)
- assert_equal(0, m.insertions)
- assert_equal(0, m.deletions)
- assert_equal(4, m.length)
- end
-
- def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil
- assert_nil(@p.match("aCcg"))
- end
-
- def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly
- m = @p.match("aCcg", pos = 0, edit_distance = 1)
- assert_equal(0, m.pos)
- assert_equal("atcg", m.match)
- assert_equal(3, m.matches)
- assert_equal(1, m.mismatches)
- assert_equal(0, m.insertions)
- assert_equal(0, m.deletions)
- assert_equal(4, m.length)
- end
-
- def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil
- assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1))
- end
-
- def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil
- assert_nil(@p.match("atGcg"))
- end
-
- def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly
- m = @p.match("atGcg", pos = 0, edit_distance = 1)
- assert_equal(0, m.pos)
- assert_equal("atcg", m.match)
- assert_equal(4, m.matches)
- assert_equal(0, m.mismatches)
- assert_equal(1, m.insertions)
- assert_equal(0, m.deletions)
- assert_equal(4, m.length)
- end
-
- def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil
- assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1))
- end
-
- def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly
- m = @p.match("atGcTg", pos = 0, edit_distance = 2)
- assert_equal(0, m.pos)
- assert_equal("atcg", m.match)
- assert_equal(4, m.matches)
- assert_equal(0, m.mismatches)
- assert_equal(2, m.insertions)
- assert_equal(0, m.deletions)
- assert_equal(4, m.length)
- end
-
- def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil
- assert_nil(@p.match("acg"))
- end
-
- def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly
- m = @p.match("acg", pos = 0, edit_distance = 1)
- assert_equal(0, m.pos)
- assert_equal("atcg", m.match)
- assert_equal(3, m.matches)
- assert_equal(0, m.mismatches)
- assert_equal(0, m.insertions)
- assert_equal(1, m.deletions)
- assert_equal(4, m.length)
- end
-
- def test_PatternMatcher_scan_locates_three_patterns_ok
- p = Seq.new("test", "ataacgagctagctagctagctgactac")
- assert_equal(3, p.scan("tag").count)
- end
-
- def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok
- p = Seq.new("test", "ataacgagctagctagctagctgactac")
- assert_equal(2, p.scan("tag", 10).count)
- end
-end