From: martinahansen Date: Tue, 15 Nov 2011 19:40:12 +0000 (+0000) Subject: fixed digest.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;ds=inline;h=2acc777adfd5196ef5a2afd607a42023ea33b461;p=biopieces.git fixed digest.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1649 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/digest_seq b/bp_bin/digest_seq index e3a7c4f..b445134 100755 --- a/bp_bin/digest_seq +++ b/bp_bin/digest_seq @@ -30,7 +30,6 @@ require 'maasha/biopieces' require 'maasha/seq' -require 'maasha/digest' casts = [] casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} @@ -41,11 +40,10 @@ options = Biopieces.options_parse(ARGV, casts) Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| if record.has_key? :SEQ_NAME and record.has_key? :SEQ - seq = Seq.new_bp(record) - digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos]) + seq = Seq.new_bp(record) - digest.each do |subseq| - new_record = subseq.to_bp + seq.each_digest(options[:pattern].to_s, options[:cut_pos]) do |digest| + new_record = digest.to_bp if new_record[:SEQ_NAME] =~ /\[(\d+)-(\d+)\]$/ s_beg = $1 @@ -55,6 +53,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| new_record[:S_BEG] = s_beg new_record[:S_END] = s_end new_record[:REC_TYPE] = "DIGEST" + output.puts new_record end else diff --git a/code_ruby/lib/maasha/digest.rb b/code_ruby/lib/maasha/digest.rb index 727e51c..835e8c0 100644 --- a/code_ruby/lib/maasha/digest.rb +++ b/code_ruby/lib/maasha/digest.rb @@ -25,45 +25,36 @@ # Error class for all exceptions to do with Digest. class DigestError < StandardError; end -class Digest - include Enumerable - - # Initialize a digest object with the following arguments: - # - seq: A sequence object. - # - pattern: A restriction enzyme recognition pattern. - # - cut_pos: Offset from match position where the enzyme cuts. - def initialize(seq, pattern, cut_pos) - @seq = seq - @pattern = disambiguate(pattern) - @cut_pos = cut_pos - @offset = 0 - end - +module Digest # Method to get the next digestion product from a sequence. - def each - @seq.seq.upcase.scan @pattern do - pos = $`.length + @cut_pos - 1 - - if pos >= 0 and pos < @seq.length - 2 - seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{pos}]", @seq.seq[@offset .. pos], @seq.type) - - yield seq + def each_digest(pattern, cut_pos) + pattern = disambiguate(pattern) + results = [] + offset = 0 + + self.seq.upcase.scan pattern do + pos = $`.length + cut_pos + + if pos >= 0 and pos < self.length - 2 + subseq = self.subseq(offset, pos - offset) + subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]" + + block_given? ? (yield subseq) : (results << subseq) end - @offset = pos + 1 + offset = pos end - if @offset < 0 - @offset = 0 - elsif @offset > @seq.length - @offset = 0 + if offset < 0 or offset > self.length + offset = 0 end - seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{@seq.length - 1}]", @seq.seq[@offset .. @seq.length], @seq.type) + subseq = self.subseq(offset) + subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]" - yield seq + block_given? ? (yield subseq) : (results << subseq) - self # conventionally + return results unless block_given? end private diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 955f8c8..598d0a2 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -25,6 +25,7 @@ require 'maasha/patternmatcher' require 'maasha/bits' require 'maasha/backtrack' +require 'maasha/digest' #require 'maasha/patscan' # Residue alphabets @@ -46,6 +47,7 @@ class Seq #include Patscan include PatternMatcher include BackTrack + include Digest attr_accessor :seq_name, :seq, :type, :qual diff --git a/code_ruby/test/maasha/test_digest.rb b/code_ruby/test/maasha/test_digest.rb index 0916c1f..979e081 100755 --- a/code_ruby/test/maasha/test_digest.rb +++ b/code_ruby/test/maasha/test_digest.rb @@ -1,7 +1,7 @@ #!/usr/bin/env ruby $:.unshift File.join(File.dirname(__FILE__),'..','lib') -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -26,27 +26,32 @@ $:.unshift File.join(File.dirname(__FILE__),'..','lib') # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'maasha/seq' -require 'maasha/digest' require 'test/unit' require 'pp' class TestDigest < Test::Unit::TestCase - def setup - @entry = Seq.new + def test_Digest_each_digest_raises_on_bad_pattern_residue + entry = Seq.new + assert_raise(DigestError) { entry.each_digest("X", 4) } end - def test_Digest_new_raises_on_bad_pattern_residue - assert_raise(DigestError) { Digest.new(@entry, "X", 4) } + def test_Digest_each_digest_dont_raise_on_ok_pattern_residue + entry = Seq.new("test", "atcg") + assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) } end - def test_Digest_new_dont_raise_on_ok_pattern_residue - assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) } + def test_Digest_each_digest_with_no_match_returns_correctly + entry = Seq.new("test", "aaaaaaaaaaa") + assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq) end - def test_Digest_each - @entry.seq = "aaaaTTTTbbbbTTTT" - digest = Digest.new(@entry, "TTNT", 1) - assert_equal("aaaaT", digest.first.seq) + def test_Digest_each_digest_with_matches_returns_correctly + entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT") + seqs = entry.each_digest("TTNT", 1) + assert_equal("T", seqs[0].seq) + assert_equal("TTTaaaaT", seqs[1].seq) + assert_equal("TTTbbbbT", seqs[2].seq) + assert_equal("TTT", seqs[3].seq) end end