From b2ea0b5a51a558478af60e9df4c643dd58552086 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 21 Jun 2013 09:35:03 +0000 Subject: [PATCH] added edit_distance method to seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@2184 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq.rb | 18 ++++++++++------ .../test/maasha/{ => seq}/test_levenshtein.rb | 21 ++++++++++++------- code_ruby/test/maasha/test_seq.rb | 9 ++++++-- 3 files changed, 33 insertions(+), 15 deletions(-) rename code_ruby/test/maasha/{ => seq}/test_levenshtein.rb (72%) diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 2788e5d..e4b6bfd 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -27,9 +27,10 @@ require 'maasha/seq/digest' require 'maasha/seq/trim' require 'narray' -autoload :BackTrack, 'maasha/seq/backtrack.rb' -autoload :Dynamic, 'maasha/seq/dynamic.rb' -autoload :Homopolymer, 'maasha/seq/homopolymer.rb' +autoload :BackTrack, 'maasha/seq/backtrack' +autoload :Dynamic, 'maasha/seq/dynamic' +autoload :Homopolymer, 'maasha/seq/homopolymer' +autoload :Levenshtein, 'maasha/seq/levenshtein' # Residue alphabets DNA = %w[a t c g] @@ -68,7 +69,6 @@ TRANS_TAB11 = { "GTG" => "V", "GCG" => "A", "GAG" => "E", "GGG" => "G" } - # Error class for all exceptions to do with Seq. class SeqError < StandardError; end @@ -384,8 +384,14 @@ class Seq # Method to determine the Hamming Distance between # two Sequence objects (case insensitive). - def hamming_distance(seq) - self.seq.upcase.hamming_distance(seq.seq.upcase) + def hamming_distance(entry) + self.seq.upcase.hamming_distance(entry.seq.upcase) + end + + # Method to determine the Edit Distance between + # two Sequence objects (case insensitive). + def edit_distance(entry) + Levenshtein.distance(self.seq, entry.seq) end # Method that generates a random sequence of a given length and type. diff --git a/code_ruby/test/maasha/test_levenshtein.rb b/code_ruby/test/maasha/seq/test_levenshtein.rb similarity index 72% rename from code_ruby/test/maasha/test_levenshtein.rb rename to code_ruby/test/maasha/seq/test_levenshtein.rb index fca9029..a260829 100755 --- a/code_ruby/test/maasha/test_levenshtein.rb +++ b/code_ruby/test/maasha/seq/test_levenshtein.rb @@ -27,27 +27,34 @@ $:.unshift File.join(File.dirname(__FILE__), '..', '..') require 'test/unit' require 'test/helper' -require 'maasha/levenshtein' +require 'maasha/seq/levenshtein' class LevenshteinTest < Test::Unit::TestCase test "Levenshtein.distance returns 0 with identical strings" do - assert_equal(0, Levenshtein.distance("foo", "foo")) + assert_equal(0, Levenshtein.distance("atcg", "atcg")) end test "Levenshtein.distance returns s length if t is empty" do - assert_equal(3, Levenshtein.distance("foo", "")) + assert_equal(4, Levenshtein.distance("atcg", "")) end test "Levenshtein.distance returns t length if s is empty" do - assert_equal(5, Levenshtein.distance("", "fobar")) + assert_equal(5, Levenshtein.distance("", "atcgn")) end test "Levenshtein.distance with mismatch only returns correctly" do - assert_equal(1, Levenshtein.distance("foo", "fox")) + assert_equal(1, Levenshtein.distance("atcg", "atgg")) + end + + test "Levenshtein.distance with ambiguity code returns correctly" do + assert_equal(0, Levenshtein.distance("atcg", "nnnn")) + end + + test "Levenshtein.distance with insertion returns correctly" do + assert_equal(1, Levenshtein.distance("atcg", "attcg")) end test "Levenshtein.distance returns correctly" do - assert_equal(3, Levenshtein.distance("kitten", "sitting")) - assert_equal(3, Levenshtein.distance("Saturday", "Sunday")) + assert_equal(2, Levenshtein.distance("atcg", "tgncg")) end end diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index 8eed8d5..5947ee8 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -298,13 +298,18 @@ class TestSeq < Test::Unit::TestCase assert_equal("UAGCuagc", @entry.complement!.seq) end - - test "#hamming distance returns correctly" do + test "#hamming_distance returns correctly" do seq1 = Seq.new("test1", "ATCG") seq2 = Seq.new("test2", "atgg") assert_equal(1, seq1.hamming_distance(seq2)) end + test "#edit_distance returns correctly" do + seq1 = Seq.new("test1", "ATCG") + seq2 = Seq.new("test2", "tgncg") + assert_equal(2, seq1.edit_distance(seq2)) + end + test "#generate with length < 1 raises" do assert_raise(SeqError) { @entry.generate(-10, :dna) } assert_raise(SeqError) { @entry.generate(0, :dna) } -- 2.39.2