]> git.donarmstrong.com Git - biopieces.git/commitdiff
fixed digest.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 15 Nov 2011 19:40:12 +0000 (19:40 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 15 Nov 2011 19:40:12 +0000 (19:40 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1649 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/digest_seq
code_ruby/lib/maasha/digest.rb
code_ruby/lib/maasha/seq.rb
code_ruby/test/maasha/test_digest.rb

index e3a7c4fb4ebfc66a3666bd4569f4f275065f57e9..b4451342da493e456e0f25e9521fa7371a7f8fdd 100755 (executable)
@@ -30,7 +30,6 @@
 
 require 'maasha/biopieces'
 require 'maasha/seq'
-require 'maasha/digest'
 
 casts = []
 casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
@@ -41,11 +40,10 @@ options = Biopieces.options_parse(ARGV, casts)
 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
   input.each_record do |record|
     if record.has_key? :SEQ_NAME and record.has_key? :SEQ
-      seq    = Seq.new_bp(record)
-      digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos])
+      seq = Seq.new_bp(record)
 
-      digest.each do |subseq|
-        new_record = subseq.to_bp
+      seq.each_digest(options[:pattern].to_s, options[:cut_pos]) do |digest|
+        new_record = digest.to_bp
 
         if new_record[:SEQ_NAME] =~ /\[(\d+)-(\d+)\]$/
           s_beg = $1
@@ -55,6 +53,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
         new_record[:S_BEG]    = s_beg 
         new_record[:S_END]    = s_end
         new_record[:REC_TYPE] = "DIGEST"
+
         output.puts new_record
       end
     else
index 727e51c6905b9f452b536224f18bb6bc8b700e58..835e8c0a115a68054a56ef40ba47a3ad82342cfc 100644 (file)
 # Error class for all exceptions to do with Digest.
 class DigestError < StandardError; end
 
-class Digest
-  include Enumerable
-
-  # Initialize a digest object with the following arguments:
-  # - seq: A sequence object.
-  # - pattern: A restriction enzyme recognition pattern.
-  # - cut_pos: Offset from match position where the enzyme cuts.
-  def initialize(seq, pattern, cut_pos)
-    @seq     = seq
-    @pattern = disambiguate(pattern)
-    @cut_pos = cut_pos
-    @offset  = 0
-  end
-
+module Digest
   # Method to get the next digestion product from a sequence.
-  def each
-    @seq.seq.upcase.scan @pattern do
-      pos = $`.length + @cut_pos - 1
-     
-      if pos >= 0 and pos < @seq.length - 2
-        seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{pos}]", @seq.seq[@offset .. pos], @seq.type)
-
-        yield seq
+  def each_digest(pattern, cut_pos)
+    pattern = disambiguate(pattern)
+    results = []
+    offset  = 0
+
+    self.seq.upcase.scan pattern do
+      pos = $`.length + cut_pos 
+
+      if pos >= 0 and pos < self.length - 2
+        subseq = self.subseq(offset, pos - offset)
+        subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]"
+
+        block_given? ? (yield subseq) : (results << subseq)
       end
 
-      @offset = pos + 1
+      offset = pos 
     end
 
-    if @offset < 0
-      @offset = 0
-    elsif @offset > @seq.length
-      @offset = 0
+    if offset < 0 or offset > self.length
+      offset = 0
     end
 
-    seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{@seq.length - 1}]", @seq.seq[@offset .. @seq.length], @seq.type)
+    subseq = self.subseq(offset)
+    subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]"
 
-    yield seq
+    block_given? ? (yield subseq) : (results << subseq)
 
-    self # conventionally
+    return results unless block_given?
   end
 
   private
index 955f8c83f0dd017fb1f5d2d3c60ba704e6c2ec0d..598d0a2cd5056cd0786735009ba4eb59571d04fd 100644 (file)
@@ -25,6 +25,7 @@
 require 'maasha/patternmatcher'
 require 'maasha/bits'
 require 'maasha/backtrack'
+require 'maasha/digest'
 #require 'maasha/patscan'
 
 # Residue alphabets
@@ -46,6 +47,7 @@ class Seq
   #include Patscan
   include PatternMatcher
   include BackTrack
+  include Digest
 
   attr_accessor :seq_name, :seq, :type, :qual
 
index 0916c1f466a1fb3479914bae4f163f1704a7b4a7..979e081b44e7a8bf4a5ce908b23eb4806d9966a5 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
 $:.unshift File.join(File.dirname(__FILE__),'..','lib')
 
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2011 Martin A. Hansen.
 
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -26,27 +26,32 @@ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 require 'maasha/seq'
-require 'maasha/digest'
 require 'test/unit'
 require 'pp'
 
 class TestDigest < Test::Unit::TestCase 
-  def setup
-    @entry = Seq.new
+  def test_Digest_each_digest_raises_on_bad_pattern_residue
+    entry = Seq.new
+    assert_raise(DigestError) { entry.each_digest("X", 4) }
   end
 
-  def test_Digest_new_raises_on_bad_pattern_residue
-    assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
+  def test_Digest_each_digest_dont_raise_on_ok_pattern_residue
+    entry = Seq.new("test", "atcg")
+    assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
   end
 
-  def test_Digest_new_dont_raise_on_ok_pattern_residue
-    assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
+  def test_Digest_each_digest_with_no_match_returns_correctly
+    entry = Seq.new("test", "aaaaaaaaaaa")
+    assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq)
   end
 
-  def test_Digest_each
-    @entry.seq = "aaaaTTTTbbbbTTTT"
-    digest = Digest.new(@entry, "TTNT", 1)
-    assert_equal("aaaaT", digest.first.seq)
+  def test_Digest_each_digest_with_matches_returns_correctly
+    entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT")
+    seqs  = entry.each_digest("TTNT", 1)
+    assert_equal("T", seqs[0].seq)
+    assert_equal("TTTaaaaT", seqs[1].seq)
+    assert_equal("TTTbbbbT", seqs[2].seq)
+    assert_equal("TTT",      seqs[3].seq)
   end
 end