From: martinahansen Date: Thu, 24 May 2012 13:09:11 +0000 (+0000) Subject: moving digest.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=37a8910b7465c8e6aeb2c8211f083f0a40ea2380;p=biopieces.git moving digest.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1825 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/digest.rb b/code_ruby/lib/maasha/digest.rb deleted file mode 100644 index 835e8c0..0000000 --- a/code_ruby/lib/maasha/digest.rb +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (C) 2007-2011 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Error class for all exceptions to do with Digest. -class DigestError < StandardError; end - -module Digest - # Method to get the next digestion product from a sequence. - def each_digest(pattern, cut_pos) - pattern = disambiguate(pattern) - results = [] - offset = 0 - - self.seq.upcase.scan pattern do - pos = $`.length + cut_pos - - if pos >= 0 and pos < self.length - 2 - subseq = self.subseq(offset, pos - offset) - subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]" - - block_given? ? (yield subseq) : (results << subseq) - end - - offset = pos - end - - if offset < 0 or offset > self.length - offset = 0 - end - - subseq = self.subseq(offset) - subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]" - - block_given? ? (yield subseq) : (results << subseq) - - return results unless block_given? - end - - private - - # Method that returns a regexp object with a restriction - # enzyme pattern with ambiguity codes substituted to the - # appropriate regexp. - def disambiguate(pattern) - ambiguity = { - 'A' => "A", - 'T' => "T", - 'U' => "T", - 'C' => "C", - 'G' => "G", - 'M' => "[AC]", - 'R' => "[AG]", - 'W' => "[AT]", - 'S' => "[CG]", - 'Y' => "[CT]", - 'K' => "[GT]", - 'V' => "[ACG]", - 'H' => "[ACT]", - 'D' => "[AGT]", - 'B' => "[CGT]", - 'N' => "[GATC]" - } - - new_pattern = "" - - pattern.upcase.each_char do |char| - if ambiguity.has_key? char - new_pattern << ambiguity[char] - else - raise DigestError, "Could not disambiguate residue: #{char}" - end - end - - Regexp.new(new_pattern) - end -end diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 0dcccba..b45540b 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -25,7 +25,7 @@ require 'maasha/patternmatcher' require 'maasha/bits' require 'maasha/backtrack' -require 'maasha/digest' +require 'maasha/seq/digest' require 'maasha/seq/trim' require 'narray' #require 'maasha/patscan' diff --git a/code_ruby/lib/maasha/seq/digest.rb b/code_ruby/lib/maasha/seq/digest.rb new file mode 100644 index 0000000..835e8c0 --- /dev/null +++ b/code_ruby/lib/maasha/seq/digest.rb @@ -0,0 +1,97 @@ +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Error class for all exceptions to do with Digest. +class DigestError < StandardError; end + +module Digest + # Method to get the next digestion product from a sequence. + def each_digest(pattern, cut_pos) + pattern = disambiguate(pattern) + results = [] + offset = 0 + + self.seq.upcase.scan pattern do + pos = $`.length + cut_pos + + if pos >= 0 and pos < self.length - 2 + subseq = self.subseq(offset, pos - offset) + subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]" + + block_given? ? (yield subseq) : (results << subseq) + end + + offset = pos + end + + if offset < 0 or offset > self.length + offset = 0 + end + + subseq = self.subseq(offset) + subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]" + + block_given? ? (yield subseq) : (results << subseq) + + return results unless block_given? + end + + private + + # Method that returns a regexp object with a restriction + # enzyme pattern with ambiguity codes substituted to the + # appropriate regexp. + def disambiguate(pattern) + ambiguity = { + 'A' => "A", + 'T' => "T", + 'U' => "T", + 'C' => "C", + 'G' => "G", + 'M' => "[AC]", + 'R' => "[AG]", + 'W' => "[AT]", + 'S' => "[CG]", + 'Y' => "[CT]", + 'K' => "[GT]", + 'V' => "[ACG]", + 'H' => "[ACT]", + 'D' => "[AGT]", + 'B' => "[CGT]", + 'N' => "[GATC]" + } + + new_pattern = "" + + pattern.upcase.each_char do |char| + if ambiguity.has_key? char + new_pattern << ambiguity[char] + else + raise DigestError, "Could not disambiguate residue: #{char}" + end + end + + Regexp.new(new_pattern) + end +end diff --git a/code_ruby/lib/maasha/seq/trim.rb b/code_ruby/lib/maasha/seq/trim.rb index a322dc9..ad8c693 100644 --- a/code_ruby/lib/maasha/seq/trim.rb +++ b/code_ruby/lib/maasha/seq/trim.rb @@ -23,6 +23,7 @@ require 'inline' +# Error class for all exceptions to do with Trim. class TrimError < StandardError; end # Module containing methods for end trimming sequences with suboptimal quality diff --git a/code_ruby/test/maasha/seq/test_digest.rb b/code_ruby/test/maasha/seq/test_digest.rb new file mode 100755 index 0000000..979e081 --- /dev/null +++ b/code_ruby/test/maasha/seq/test_digest.rb @@ -0,0 +1,59 @@ +#!/usr/bin/env ruby +$:.unshift File.join(File.dirname(__FILE__),'..','lib') + +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/seq' +require 'test/unit' +require 'pp' + +class TestDigest < Test::Unit::TestCase + def test_Digest_each_digest_raises_on_bad_pattern_residue + entry = Seq.new + assert_raise(DigestError) { entry.each_digest("X", 4) } + end + + def test_Digest_each_digest_dont_raise_on_ok_pattern_residue + entry = Seq.new("test", "atcg") + assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) } + end + + def test_Digest_each_digest_with_no_match_returns_correctly + entry = Seq.new("test", "aaaaaaaaaaa") + assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq) + end + + def test_Digest_each_digest_with_matches_returns_correctly + entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT") + seqs = entry.each_digest("TTNT", 1) + assert_equal("T", seqs[0].seq) + assert_equal("TTTaaaaT", seqs[1].seq) + assert_equal("TTTbbbbT", seqs[2].seq) + assert_equal("TTT", seqs[3].seq) + end +end + + +__END__ diff --git a/code_ruby/test/maasha/test_digest.rb b/code_ruby/test/maasha/test_digest.rb deleted file mode 100755 index 979e081..0000000 --- a/code_ruby/test/maasha/test_digest.rb +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env ruby -$:.unshift File.join(File.dirname(__FILE__),'..','lib') - -# Copyright (C) 2007-2011 Martin A. Hansen. - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# This software is part of the Biopieces framework (www.biopieces.org). - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -require 'maasha/seq' -require 'test/unit' -require 'pp' - -class TestDigest < Test::Unit::TestCase - def test_Digest_each_digest_raises_on_bad_pattern_residue - entry = Seq.new - assert_raise(DigestError) { entry.each_digest("X", 4) } - end - - def test_Digest_each_digest_dont_raise_on_ok_pattern_residue - entry = Seq.new("test", "atcg") - assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) } - end - - def test_Digest_each_digest_with_no_match_returns_correctly - entry = Seq.new("test", "aaaaaaaaaaa") - assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq) - end - - def test_Digest_each_digest_with_matches_returns_correctly - entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT") - seqs = entry.each_digest("TTNT", 1) - assert_equal("T", seqs[0].seq) - assert_equal("TTTaaaaT", seqs[1].seq) - assert_equal("TTTbbbbT", seqs[2].seq) - assert_equal("TTT", seqs[3].seq) - end -end - - -__END__