require 'maasha/biopieces'
require 'maasha/seq'
-require 'maasha/digest'
casts = []
casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record.has_key? :SEQ_NAME and record.has_key? :SEQ
- seq = Seq.new_bp(record)
- digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos])
+ seq = Seq.new_bp(record)
- digest.each do |subseq|
- new_record = subseq.to_bp
+ seq.each_digest(options[:pattern].to_s, options[:cut_pos]) do |digest|
+ new_record = digest.to_bp
if new_record[:SEQ_NAME] =~ /\[(\d+)-(\d+)\]$/
s_beg = $1
new_record[:S_BEG] = s_beg
new_record[:S_END] = s_end
new_record[:REC_TYPE] = "DIGEST"
+
output.puts new_record
end
else
# Error class for all exceptions to do with Digest.
class DigestError < StandardError; end
-class Digest
- include Enumerable
-
- # Initialize a digest object with the following arguments:
- # - seq: A sequence object.
- # - pattern: A restriction enzyme recognition pattern.
- # - cut_pos: Offset from match position where the enzyme cuts.
- def initialize(seq, pattern, cut_pos)
- @seq = seq
- @pattern = disambiguate(pattern)
- @cut_pos = cut_pos
- @offset = 0
- end
-
+module Digest
# Method to get the next digestion product from a sequence.
- def each
- @seq.seq.upcase.scan @pattern do
- pos = $`.length + @cut_pos - 1
-
- if pos >= 0 and pos < @seq.length - 2
- seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{pos}]", @seq.seq[@offset .. pos], @seq.type)
-
- yield seq
+ def each_digest(pattern, cut_pos)
+ pattern = disambiguate(pattern)
+ results = []
+ offset = 0
+
+ self.seq.upcase.scan pattern do
+ pos = $`.length + cut_pos
+
+ if pos >= 0 and pos < self.length - 2
+ subseq = self.subseq(offset, pos - offset)
+ subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]"
+
+ block_given? ? (yield subseq) : (results << subseq)
end
- @offset = pos + 1
+ offset = pos
end
- if @offset < 0
- @offset = 0
- elsif @offset > @seq.length
- @offset = 0
+ if offset < 0 or offset > self.length
+ offset = 0
end
- seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{@seq.length - 1}]", @seq.seq[@offset .. @seq.length], @seq.type)
+ subseq = self.subseq(offset)
+ subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]"
- yield seq
+ block_given? ? (yield subseq) : (results << subseq)
- self # conventionally
+ return results unless block_given?
end
private
require 'maasha/patternmatcher'
require 'maasha/bits'
require 'maasha/backtrack'
+require 'maasha/digest'
#require 'maasha/patscan'
# Residue alphabets
#include Patscan
include PatternMatcher
include BackTrack
+ include Digest
attr_accessor :seq_name, :seq, :type, :qual
#!/usr/bin/env ruby
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2011 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'maasha/seq'
-require 'maasha/digest'
require 'test/unit'
require 'pp'
class TestDigest < Test::Unit::TestCase
- def setup
- @entry = Seq.new
+ def test_Digest_each_digest_raises_on_bad_pattern_residue
+ entry = Seq.new
+ assert_raise(DigestError) { entry.each_digest("X", 4) }
end
- def test_Digest_new_raises_on_bad_pattern_residue
- assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
+ def test_Digest_each_digest_dont_raise_on_ok_pattern_residue
+ entry = Seq.new("test", "atcg")
+ assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
end
- def test_Digest_new_dont_raise_on_ok_pattern_residue
- assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
+ def test_Digest_each_digest_with_no_match_returns_correctly
+ entry = Seq.new("test", "aaaaaaaaaaa")
+ assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq)
end
- def test_Digest_each
- @entry.seq = "aaaaTTTTbbbbTTTT"
- digest = Digest.new(@entry, "TTNT", 1)
- assert_equal("aaaaT", digest.first.seq)
+ def test_Digest_each_digest_with_matches_returns_correctly
+ entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT")
+ seqs = entry.each_digest("TTNT", 1)
+ assert_equal("T", seqs[0].seq)
+ assert_equal("TTTaaaaT", seqs[1].seq)
+ assert_equal("TTTbbbbT", seqs[2].seq)
+ assert_equal("TTT", seqs[3].seq)
end
end