From: martinahansen Date: Fri, 21 Jun 2013 09:35:03 +0000 (+0000) Subject: added edit_distance method to seq.rb X-Git-Url: https://git.donarmstrong.com/?p=biopieces.git;a=commitdiff_plain;h=b2ea0b5a51a558478af60e9df4c643dd58552086 added edit_distance method to seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@2184 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 2788e5d..e4b6bfd 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -27,9 +27,10 @@ require 'maasha/seq/digest' require 'maasha/seq/trim' require 'narray' -autoload :BackTrack, 'maasha/seq/backtrack.rb' -autoload :Dynamic, 'maasha/seq/dynamic.rb' -autoload :Homopolymer, 'maasha/seq/homopolymer.rb' +autoload :BackTrack, 'maasha/seq/backtrack' +autoload :Dynamic, 'maasha/seq/dynamic' +autoload :Homopolymer, 'maasha/seq/homopolymer' +autoload :Levenshtein, 'maasha/seq/levenshtein' # Residue alphabets DNA = %w[a t c g] @@ -68,7 +69,6 @@ TRANS_TAB11 = { "GTG" => "V", "GCG" => "A", "GAG" => "E", "GGG" => "G" } - # Error class for all exceptions to do with Seq. class SeqError < StandardError; end @@ -384,8 +384,14 @@ class Seq # Method to determine the Hamming Distance between # two Sequence objects (case insensitive). - def hamming_distance(seq) - self.seq.upcase.hamming_distance(seq.seq.upcase) + def hamming_distance(entry) + self.seq.upcase.hamming_distance(entry.seq.upcase) + end + + # Method to determine the Edit Distance between + # two Sequence objects (case insensitive). + def edit_distance(entry) + Levenshtein.distance(self.seq, entry.seq) end # Method that generates a random sequence of a given length and type. diff --git a/code_ruby/test/maasha/seq/test_levenshtein.rb b/code_ruby/test/maasha/seq/test_levenshtein.rb new file mode 100755 index 0000000..a260829 --- /dev/null +++ b/code_ruby/test/maasha/seq/test_levenshtein.rb @@ -0,0 +1,60 @@ +#!/usr/bin/env ruby +$:.unshift File.join(File.dirname(__FILE__), '..', '..') + +# Copyright (C) 2013 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'test/unit' +require 'test/helper' +require 'maasha/seq/levenshtein' + +class LevenshteinTest < Test::Unit::TestCase + test "Levenshtein.distance returns 0 with identical strings" do + assert_equal(0, Levenshtein.distance("atcg", "atcg")) + end + + test "Levenshtein.distance returns s length if t is empty" do + assert_equal(4, Levenshtein.distance("atcg", "")) + end + + test "Levenshtein.distance returns t length if s is empty" do + assert_equal(5, Levenshtein.distance("", "atcgn")) + end + + test "Levenshtein.distance with mismatch only returns correctly" do + assert_equal(1, Levenshtein.distance("atcg", "atgg")) + end + + test "Levenshtein.distance with ambiguity code returns correctly" do + assert_equal(0, Levenshtein.distance("atcg", "nnnn")) + end + + test "Levenshtein.distance with insertion returns correctly" do + assert_equal(1, Levenshtein.distance("atcg", "attcg")) + end + + test "Levenshtein.distance returns correctly" do + assert_equal(2, Levenshtein.distance("atcg", "tgncg")) + end +end diff --git a/code_ruby/test/maasha/test_levenshtein.rb b/code_ruby/test/maasha/test_levenshtein.rb deleted file mode 100755 index fca9029..0000000 --- a/code_ruby/test/maasha/test_levenshtein.rb +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env ruby -$:.unshift File.join(File.dirname(__FILE__), '..', '..') - -# Copyright (C) 2013 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -require 'test/unit' -require 'test/helper' -require 'maasha/levenshtein' - -class LevenshteinTest < Test::Unit::TestCase - test "Levenshtein.distance returns 0 with identical strings" do - assert_equal(0, Levenshtein.distance("foo", "foo")) - end - - test "Levenshtein.distance returns s length if t is empty" do - assert_equal(3, Levenshtein.distance("foo", "")) - end - - test "Levenshtein.distance returns t length if s is empty" do - assert_equal(5, Levenshtein.distance("", "fobar")) - end - - test "Levenshtein.distance with mismatch only returns correctly" do - assert_equal(1, Levenshtein.distance("foo", "fox")) - end - - test "Levenshtein.distance returns correctly" do - assert_equal(3, Levenshtein.distance("kitten", "sitting")) - assert_equal(3, Levenshtein.distance("Saturday", "Sunday")) - end -end diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index 8eed8d5..5947ee8 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -298,13 +298,18 @@ class TestSeq < Test::Unit::TestCase assert_equal("UAGCuagc", @entry.complement!.seq) end - - test "#hamming distance returns correctly" do + test "#hamming_distance returns correctly" do seq1 = Seq.new("test1", "ATCG") seq2 = Seq.new("test2", "atgg") assert_equal(1, seq1.hamming_distance(seq2)) end + test "#edit_distance returns correctly" do + seq1 = Seq.new("test1", "ATCG") + seq2 = Seq.new("test2", "tgncg") + assert_equal(2, seq1.edit_distance(seq2)) + end + test "#generate with length < 1 raises" do assert_raise(SeqError) { @entry.generate(-10, :dna) } assert_raise(SeqError) { @entry.generate(0, :dna) }