From: martinahansen Date: Thu, 24 May 2012 13:30:02 +0000 (+0000) Subject: moved patternmatcher.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=44ce73c7a2c51657b223604657cea09bdf007adc;p=biopieces.git moved patternmatcher.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1826 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/patternmatcher.rb b/code_ruby/lib/maasha/patternmatcher.rb deleted file mode 100644 index a07fd3a..0000000 --- a/code_ruby/lib/maasha/patternmatcher.rb +++ /dev/null @@ -1,264 +0,0 @@ -# Copyright (C) 2007-2011 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# IUPAC nucleotide pair ambiguity equivalents are saved in an -# array of bit fields. - -BIT_A = 1 << 0 -BIT_T = 1 << 1 -BIT_C = 1 << 2 -BIT_G = 1 << 3 - -EQUAL = Array.new(256, 0) -EQUAL['A'.ord] = BIT_A -EQUAL['a'.ord] = BIT_A -EQUAL['T'.ord] = BIT_T -EQUAL['t'.ord] = BIT_T -EQUAL['U'.ord] = BIT_T -EQUAL['u'.ord] = BIT_T -EQUAL['C'.ord] = BIT_C -EQUAL['c'.ord] = BIT_C -EQUAL['G'.ord] = BIT_G -EQUAL['g'.ord] = BIT_G -EQUAL['M'.ord] = (BIT_A|BIT_C) -EQUAL['m'.ord] = (BIT_A|BIT_C) -EQUAL['R'.ord] = (BIT_A|BIT_G) -EQUAL['r'.ord] = (BIT_A|BIT_G) -EQUAL['W'.ord] = (BIT_A|BIT_T) -EQUAL['w'.ord] = (BIT_A|BIT_T) -EQUAL['S'.ord] = (BIT_C|BIT_G) -EQUAL['s'.ord] = (BIT_C|BIT_G) -EQUAL['Y'.ord] = (BIT_C|BIT_T) -EQUAL['y'.ord] = (BIT_C|BIT_T) -EQUAL['K'.ord] = (BIT_G|BIT_T) -EQUAL['k'.ord] = (BIT_G|BIT_T) -EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T) -EQUAL['b'.ord] = (BIT_C|BIT_G|BIT_T) -EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T) -EQUAL['d'.ord] = (BIT_A|BIT_G|BIT_T) -EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T) -EQUAL['h'.ord] = (BIT_A|BIT_C|BIT_T) -EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G) -EQUAL['v'.ord] = (BIT_A|BIT_C|BIT_G) -EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T) -EQUAL['n'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T) - -# Module containing code to locate nucleotide patterns in sequences allowing for -# ambiguity codes and a given maximum edit distance. -# Insertions are nucleotides found in the pattern but not in the sequence. -# Deletions are nucleotides found in the sequence but not in the pattern. -# -# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197): -# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf -module PatternMatcher - # ------------------------------------------------------------------------------ - # str.match(pattern[, pos[, max_edit_distance]]) - # -> Match or nil - # - # ------------------------------------------------------------------------------ - # Method to locate the next pattern match starting from a given position. A match - # is allowed to contain a given maximum edit distance. If a match is located a - # Match object will be returned otherwise nil. - def match(pattern, pos = 0, max_edit_distance = 0) - vector = Vector.new(@seq, pattern, max_edit_distance) - - while pos < @seq.length - vector.update(pos) - - return vector.to_match(pos) if vector.match_found? - - pos += 1 - end - - nil # no match - end - - # ------------------------------------------------------------------------------ - # str.scan(pattern[, pos[, max_edit_distance]]) - # -> Array - # str.scan(pattern[, pos[, max_edit_distance]]) { |match| - # block - # } - # -> Match - # - # ------------------------------------------------------------------------------ - # Method to iterate through a sequence to locate pattern matches starting - # from a given position and allowing for a maximum edit distance. - # Matches found in block context return the Match object. Otherwise matches are - # returned in an Array. - def scan(pattern, pos = 0, max_edit_distance = 0) - matches = [] - - while match = match(pattern, pos, max_edit_distance) - if block_given? - yield match - else - matches << match - end - - pos = match.pos + 1 - end - - return matches unless block_given? - end -end - -# Class containing the score vector used for locating matches. -class Vector - # Method to initailize the score vector. - def initialize(seq, pattern, max_edit_distance) - @seq = seq - @pattern = pattern - @max_edit_distance = max_edit_distance - @vector = [] - - (0 ... @pattern.length + 1).each do |i| - @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i) - end - end - - # Method to update the score vector. - def update(pos) - score_diag = @vector[0] - score_up = Score.new # insertion - score_left = @vector[1] # deletion - - (0 ... @pattern.length).each do |i| - if match?(@seq[pos], @pattern[i]) - new_score = score_diag.dup - new_score.matches += 1 - else - if deletion?(score_diag, score_up, score_left) - new_score = score_left.dup - new_score.deletions += 1 - elsif mismatch?(score_diag, score_up, score_left) - new_score = score_diag.dup - new_score.mismatches += 1 - elsif insertion?(score_diag, score_up, score_left) - new_score = score_up.dup - new_score.insertions += 1 - end - - new_score.edit_distance += 1 - end - - score_diag = @vector[i + 1] - score_up = new_score - score_left = @vector[i + 2] - - @vector[i + 1] = new_score - end - end - - # Method that determines if a match was found by analyzing the score vector. - def match_found? - if @vector.last.edit_distance <= @max_edit_distance - true - end - end - - # Method that returns a Match object initialized with - # information from the score vector. - def to_match(pos) - matches = @vector.last.matches - mismatches = @vector.last.mismatches - insertions = @vector.last.insertions - deletions = @vector.last.deletions - length = @pattern.length - insertions + deletions - offset = pos - length + 1 - match = @seq[offset ... offset + length] - - Match.new(offset, match, matches, mismatches, insertions, deletions, length) - end - - # Method to convert the score vector to a string. - def to_s - "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n" - end - - private - - # Method to determine if a match occurred. - def match?(char1, char2) - (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0 - end - - # Method to determine if a mismatch occured. - def mismatch?(score_diag, score_up, score_left) - if score_diag.edit_distance <= score_up.edit_distance and - score_diag.edit_distance <= score_left.edit_distance - true - end - end - - # Method to determine if an insertion occured. - def insertion?(score_diag, score_up, score_left) - if score_up.edit_distance <= score_diag.edit_distance and - score_up.edit_distance <= score_left.edit_distance - true - end - end - - # Method to determine if a deletion occured. - def deletion?(score_diag, score_up, score_left) - if score_left.edit_distance <= score_diag.edit_distance and - score_left.edit_distance <= score_up.edit_distance - true - end - end -end - -# Class to instantiate Score objects that holds score information. -class Score - attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance - - def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0) - @matches = matches - @mismatches = mismatches - @insertions = insertions - @deletions = deletions - @edit_distance = edit_distance - end - - def to_s - "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})" - end -end - -# Class for creating Match objects which contain the description of a -# match between a nucleotide sequence and a pattern. -class Match - attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length - - def initialize(pos, match, matches, mismatches, insertions, deletions, length) - @pos = pos - @match = match - @matches = matches - @mismatches = mismatches - @insertions = insertions - @deletions = deletions - @edit_distance = mismatches + insertions + deletions - @length = length - end -end diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index b45540b..3c1c156 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -22,10 +22,10 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'maasha/patternmatcher' require 'maasha/bits' require 'maasha/backtrack' require 'maasha/seq/digest' +require 'maasha/seq/patternmatcher' require 'maasha/seq/trim' require 'narray' #require 'maasha/patscan' diff --git a/code_ruby/lib/maasha/seq/patternmatcher.rb b/code_ruby/lib/maasha/seq/patternmatcher.rb new file mode 100644 index 0000000..a07fd3a --- /dev/null +++ b/code_ruby/lib/maasha/seq/patternmatcher.rb @@ -0,0 +1,264 @@ +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# IUPAC nucleotide pair ambiguity equivalents are saved in an +# array of bit fields. + +BIT_A = 1 << 0 +BIT_T = 1 << 1 +BIT_C = 1 << 2 +BIT_G = 1 << 3 + +EQUAL = Array.new(256, 0) +EQUAL['A'.ord] = BIT_A +EQUAL['a'.ord] = BIT_A +EQUAL['T'.ord] = BIT_T +EQUAL['t'.ord] = BIT_T +EQUAL['U'.ord] = BIT_T +EQUAL['u'.ord] = BIT_T +EQUAL['C'.ord] = BIT_C +EQUAL['c'.ord] = BIT_C +EQUAL['G'.ord] = BIT_G +EQUAL['g'.ord] = BIT_G +EQUAL['M'.ord] = (BIT_A|BIT_C) +EQUAL['m'.ord] = (BIT_A|BIT_C) +EQUAL['R'.ord] = (BIT_A|BIT_G) +EQUAL['r'.ord] = (BIT_A|BIT_G) +EQUAL['W'.ord] = (BIT_A|BIT_T) +EQUAL['w'.ord] = (BIT_A|BIT_T) +EQUAL['S'.ord] = (BIT_C|BIT_G) +EQUAL['s'.ord] = (BIT_C|BIT_G) +EQUAL['Y'.ord] = (BIT_C|BIT_T) +EQUAL['y'.ord] = (BIT_C|BIT_T) +EQUAL['K'.ord] = (BIT_G|BIT_T) +EQUAL['k'.ord] = (BIT_G|BIT_T) +EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T) +EQUAL['b'.ord] = (BIT_C|BIT_G|BIT_T) +EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T) +EQUAL['d'.ord] = (BIT_A|BIT_G|BIT_T) +EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T) +EQUAL['h'.ord] = (BIT_A|BIT_C|BIT_T) +EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G) +EQUAL['v'.ord] = (BIT_A|BIT_C|BIT_G) +EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T) +EQUAL['n'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T) + +# Module containing code to locate nucleotide patterns in sequences allowing for +# ambiguity codes and a given maximum edit distance. +# Insertions are nucleotides found in the pattern but not in the sequence. +# Deletions are nucleotides found in the sequence but not in the pattern. +# +# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197): +# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf +module PatternMatcher + # ------------------------------------------------------------------------------ + # str.match(pattern[, pos[, max_edit_distance]]) + # -> Match or nil + # + # ------------------------------------------------------------------------------ + # Method to locate the next pattern match starting from a given position. A match + # is allowed to contain a given maximum edit distance. If a match is located a + # Match object will be returned otherwise nil. + def match(pattern, pos = 0, max_edit_distance = 0) + vector = Vector.new(@seq, pattern, max_edit_distance) + + while pos < @seq.length + vector.update(pos) + + return vector.to_match(pos) if vector.match_found? + + pos += 1 + end + + nil # no match + end + + # ------------------------------------------------------------------------------ + # str.scan(pattern[, pos[, max_edit_distance]]) + # -> Array + # str.scan(pattern[, pos[, max_edit_distance]]) { |match| + # block + # } + # -> Match + # + # ------------------------------------------------------------------------------ + # Method to iterate through a sequence to locate pattern matches starting + # from a given position and allowing for a maximum edit distance. + # Matches found in block context return the Match object. Otherwise matches are + # returned in an Array. + def scan(pattern, pos = 0, max_edit_distance = 0) + matches = [] + + while match = match(pattern, pos, max_edit_distance) + if block_given? + yield match + else + matches << match + end + + pos = match.pos + 1 + end + + return matches unless block_given? + end +end + +# Class containing the score vector used for locating matches. +class Vector + # Method to initailize the score vector. + def initialize(seq, pattern, max_edit_distance) + @seq = seq + @pattern = pattern + @max_edit_distance = max_edit_distance + @vector = [] + + (0 ... @pattern.length + 1).each do |i| + @vector[i] = Score.new(matches = 0, mismatches = 0, insertions = i, deletions = 0, edit_distance = i) + end + end + + # Method to update the score vector. + def update(pos) + score_diag = @vector[0] + score_up = Score.new # insertion + score_left = @vector[1] # deletion + + (0 ... @pattern.length).each do |i| + if match?(@seq[pos], @pattern[i]) + new_score = score_diag.dup + new_score.matches += 1 + else + if deletion?(score_diag, score_up, score_left) + new_score = score_left.dup + new_score.deletions += 1 + elsif mismatch?(score_diag, score_up, score_left) + new_score = score_diag.dup + new_score.mismatches += 1 + elsif insertion?(score_diag, score_up, score_left) + new_score = score_up.dup + new_score.insertions += 1 + end + + new_score.edit_distance += 1 + end + + score_diag = @vector[i + 1] + score_up = new_score + score_left = @vector[i + 2] + + @vector[i + 1] = new_score + end + end + + # Method that determines if a match was found by analyzing the score vector. + def match_found? + if @vector.last.edit_distance <= @max_edit_distance + true + end + end + + # Method that returns a Match object initialized with + # information from the score vector. + def to_match(pos) + matches = @vector.last.matches + mismatches = @vector.last.mismatches + insertions = @vector.last.insertions + deletions = @vector.last.deletions + length = @pattern.length - insertions + deletions + offset = pos - length + 1 + match = @seq[offset ... offset + length] + + Match.new(offset, match, matches, mismatches, insertions, deletions, length) + end + + # Method to convert the score vector to a string. + def to_s + "(m,m,i,d,e)\n" + @vector.join("\n") + "\n\n" + end + + private + + # Method to determine if a match occurred. + def match?(char1, char2) + (EQUAL[char1.ord] & EQUAL[char2.ord]) != 0 + end + + # Method to determine if a mismatch occured. + def mismatch?(score_diag, score_up, score_left) + if score_diag.edit_distance <= score_up.edit_distance and + score_diag.edit_distance <= score_left.edit_distance + true + end + end + + # Method to determine if an insertion occured. + def insertion?(score_diag, score_up, score_left) + if score_up.edit_distance <= score_diag.edit_distance and + score_up.edit_distance <= score_left.edit_distance + true + end + end + + # Method to determine if a deletion occured. + def deletion?(score_diag, score_up, score_left) + if score_left.edit_distance <= score_diag.edit_distance and + score_left.edit_distance <= score_up.edit_distance + true + end + end +end + +# Class to instantiate Score objects that holds score information. +class Score + attr_accessor :matches, :mismatches, :insertions, :deletions, :edit_distance + + def initialize(matches = 0, mismatches = 0, insertions = 0, deletions = 0, edit_distance = 0) + @matches = matches + @mismatches = mismatches + @insertions = insertions + @deletions = deletions + @edit_distance = edit_distance + end + + def to_s + "(#{[self.matches, self.mismatches, self.insertions, self.deletions, self.edit_distance].join(',')})" + end +end + +# Class for creating Match objects which contain the description of a +# match between a nucleotide sequence and a pattern. +class Match + attr_reader :pos, :match, :matches, :mismatches, :insertions, :deletions, :edit_distance, :length + + def initialize(pos, match, matches, mismatches, insertions, deletions, length) + @pos = pos + @match = match + @matches = matches + @mismatches = mismatches + @insertions = insertions + @deletions = deletions + @edit_distance = mismatches + insertions + deletions + @length = length + end +end diff --git a/code_ruby/test/maasha/seq/test_patternmatcher.rb b/code_ruby/test/maasha/seq/test_patternmatcher.rb new file mode 100755 index 0000000..9a7a50e --- /dev/null +++ b/code_ruby/test/maasha/seq/test_patternmatcher.rb @@ -0,0 +1,136 @@ +#!/usr/bin/env ruby +$:.unshift File.join(File.dirname(__FILE__),'..','lib') + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/seq' +require 'test/unit' +require 'pp' + +class TestPatternMatcher < Test::Unit::TestCase + def setup + @p = Seq.new("test", "atcg") + end + + def test_PatternMatcher_no_match_returns_nil + assert_nil(@p.match("gggg")) + end + + def test_PatternMatcher_match_perfect_returns_correctly + m = @p.match("atcg") + assert_equal(0, m.pos) + assert_equal("atcg", m.match) + assert_equal(4, m.matches) + assert_equal(0, m.mismatches) + assert_equal(0, m.insertions) + assert_equal(0, m.deletions) + assert_equal(4, m.length) + end + + def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly + m = @p.match("nnnn") + assert_equal(0, m.pos) + assert_equal("atcg", m.match) + assert_equal(4, m.matches) + assert_equal(0, m.mismatches) + assert_equal(0, m.insertions) + assert_equal(0, m.deletions) + assert_equal(4, m.length) + end + + def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil + assert_nil(@p.match("aCcg")) + end + + def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly + m = @p.match("aCcg", pos = 0, edit_distance = 1) + assert_equal(0, m.pos) + assert_equal("atcg", m.match) + assert_equal(3, m.matches) + assert_equal(1, m.mismatches) + assert_equal(0, m.insertions) + assert_equal(0, m.deletions) + assert_equal(4, m.length) + end + + def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil + assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1)) + end + + def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil + assert_nil(@p.match("atGcg")) + end + + def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly + m = @p.match("atGcg", pos = 0, edit_distance = 1) + assert_equal(0, m.pos) + assert_equal("atcg", m.match) + assert_equal(4, m.matches) + assert_equal(0, m.mismatches) + assert_equal(1, m.insertions) + assert_equal(0, m.deletions) + assert_equal(4, m.length) + end + + def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil + assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1)) + end + + def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly + m = @p.match("atGcTg", pos = 0, edit_distance = 2) + assert_equal(0, m.pos) + assert_equal("atcg", m.match) + assert_equal(4, m.matches) + assert_equal(0, m.mismatches) + assert_equal(2, m.insertions) + assert_equal(0, m.deletions) + assert_equal(4, m.length) + end + + def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil + assert_nil(@p.match("acg")) + end + + def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly + m = @p.match("acg", pos = 0, edit_distance = 1) + assert_equal(0, m.pos) + assert_equal("atcg", m.match) + assert_equal(3, m.matches) + assert_equal(0, m.mismatches) + assert_equal(0, m.insertions) + assert_equal(1, m.deletions) + assert_equal(4, m.length) + end + + def test_PatternMatcher_scan_locates_three_patterns_ok + p = Seq.new("test", "ataacgagctagctagctagctgactac") + assert_equal(3, p.scan("tag").count) + end + + def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok + p = Seq.new("test", "ataacgagctagctagctagctgactac") + assert_equal(2, p.scan("tag", 10).count) + end +end diff --git a/code_ruby/test/maasha/test_patternmatcher.rb b/code_ruby/test/maasha/test_patternmatcher.rb deleted file mode 100755 index 9a7a50e..0000000 --- a/code_ruby/test/maasha/test_patternmatcher.rb +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env ruby -$:.unshift File.join(File.dirname(__FILE__),'..','lib') - -# Copyright (C) 2007-2010 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -require 'maasha/seq' -require 'test/unit' -require 'pp' - -class TestPatternMatcher < Test::Unit::TestCase - def setup - @p = Seq.new("test", "atcg") - end - - def test_PatternMatcher_no_match_returns_nil - assert_nil(@p.match("gggg")) - end - - def test_PatternMatcher_match_perfect_returns_correctly - m = @p.match("atcg") - assert_equal(0, m.pos) - assert_equal("atcg", m.match) - assert_equal(4, m.matches) - assert_equal(0, m.mismatches) - assert_equal(0, m.insertions) - assert_equal(0, m.deletions) - assert_equal(4, m.length) - end - - def test_PatternMatcher_match_perfect_with_ambiguity_codes_returns_correctly - m = @p.match("nnnn") - assert_equal(0, m.pos) - assert_equal("atcg", m.match) - assert_equal(4, m.matches) - assert_equal(0, m.mismatches) - assert_equal(0, m.insertions) - assert_equal(0, m.deletions) - assert_equal(4, m.length) - end - - def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_zero_returns_nil - assert_nil(@p.match("aCcg")) - end - - def test_PatternMatcher_match_with_one_mismatch_and_edit_dist_one_returns_correctly - m = @p.match("aCcg", pos = 0, edit_distance = 1) - assert_equal(0, m.pos) - assert_equal("atcg", m.match) - assert_equal(3, m.matches) - assert_equal(1, m.mismatches) - assert_equal(0, m.insertions) - assert_equal(0, m.deletions) - assert_equal(4, m.length) - end - - def test_PatternMatcher_match_with_two_mismatch_and_edit_dist_one_returns_nil - assert_nil(@p.match("aGcA", pos = 0, edit_distance = 1)) - end - - def test_PatternMatcher_match_with_one_insertion_and_edit_dist_zero_returns_nil - assert_nil(@p.match("atGcg")) - end - - def test_PatternMatcher_match_with_one_insertion_and_edit_dist_one_returns_correctly - m = @p.match("atGcg", pos = 0, edit_distance = 1) - assert_equal(0, m.pos) - assert_equal("atcg", m.match) - assert_equal(4, m.matches) - assert_equal(0, m.mismatches) - assert_equal(1, m.insertions) - assert_equal(0, m.deletions) - assert_equal(4, m.length) - end - - def test_PatternMatcher_match_with_two_insertions_and_edit_dist_one_returns_nil - assert_nil(@p.match("atGcTg", pos = 0, edit_distance = 1)) - end - - def test_PatternMatcher_match_with_two_insertions_and_edit_dist_two_returns_correctly - m = @p.match("atGcTg", pos = 0, edit_distance = 2) - assert_equal(0, m.pos) - assert_equal("atcg", m.match) - assert_equal(4, m.matches) - assert_equal(0, m.mismatches) - assert_equal(2, m.insertions) - assert_equal(0, m.deletions) - assert_equal(4, m.length) - end - - def test_PatternMatcher_match_with_one_deletion_and_edit_distance_zero_returns_nil - assert_nil(@p.match("acg")) - end - - def test_PatternMatcher_match_with_one_deletion_and_edit_distance_one_returns_correctly - m = @p.match("acg", pos = 0, edit_distance = 1) - assert_equal(0, m.pos) - assert_equal("atcg", m.match) - assert_equal(3, m.matches) - assert_equal(0, m.mismatches) - assert_equal(0, m.insertions) - assert_equal(1, m.deletions) - assert_equal(4, m.length) - end - - def test_PatternMatcher_scan_locates_three_patterns_ok - p = Seq.new("test", "ataacgagctagctagctagctgactac") - assert_equal(3, p.scan("tag").count) - end - - def test_PatternMatcher_scan_with_pos_locates_two_patterns_ok - p = Seq.new("test", "ataacgagctagctagctagctgactac") - assert_equal(2, p.scan("tag", 10).count) - end -end