--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Splits sequences in the stream at a given restriction enzyme's cleavage points.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'biopieces'
+require 'fasta'
+require 'seq'
+
+casts = []
+casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'cut_pos', :short=>'c', :type=>'int', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+bp.each_record do |record|
+ bp.puts record
+
+ if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ seq = Seq.new(record[:SEQ_NAME], record[:SEQ])
+ digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos])
+
+ digest.each do |subseq|
+ new_record = subseq.to_bp
+ new_record[:REC_TYPE] = "DIGEST"
+ bp.puts new_record
+ end
+ end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
outfile = pat_file.sub("pat", "fna")
command = "scan_for_matches"
- command << " -c"
+ # command << " -c"
command << " #{pat_file}"
command << " < #{@infile}"
command << " > #{outfile}"
class DigestError < StandardError; end
class Digest
+ include Enumerable
+
# Initialize a digest object with the following arguments:
# - seq: A sequence object.
# - pattern: A restriction enzyme recognition pattern.
@seq = seq
@pattern = disambiguate(pattern)
@cut_pos = cut_pos
+ @offset = 0
end
- # Method that returns a list of positions where
- # a specified restriction enzyme will cut the sequence.
- def positions
- positions = []
-
- @seq.seq.scan @pattern do |match|
+ # Method to get the next digestion product from a sequence.
+ def each
+ @seq.seq.scan @pattern do
pos = $`.length + @cut_pos - 1
-
+
if pos >= 0 and pos < @seq.seq.length - 2
- positions << pos
- end
- end
+ seq = Seq.new("#{@seq.seq_name}[#{@offset + 1}-#{pos + 1}]", @seq.seq[@offset .. pos], @seq.type)
- positions
- end
-
- # Method that returns a list of strings with digestion produts
- # from the digestion of a specified sequence with a specified
- # restriction enzyme.
- def products
- products = []
-
- beg = 0
+ yield seq
+ end
- self.positions.each do |pos|
- products << @seq.seq[beg .. pos]
- beg = pos + 1
+ @offset = pos + 1
end
- if beg < @seq.seq.length
- products << @seq.seq[beg .. @seq.seq.length]
+ # Handle remaining sequence after last cut.
+ if @offset > 0 and @offset < @seq.seq.length
+ seq = Seq.new("#{@seq.seq_name}[#{@offset + 1}-#{@seq.seq.length}]", @seq.seq[@offset .. @seq.seq.length], @seq.type)
+
+ yield seq
end
- products
+ self # conventionally
end
private
assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
end
- def test_Digest_positions_return_correctly_at_begin
- @entry.seq = "TTTTaaa"
- digest = Digest.new(@entry, "TTTT", 0)
- assert_equal([], digest.positions)
- end
-
- def test_Digest_positions_return_correctly
- @entry.seq = "TTTTaaa"
- digest = Digest.new(@entry, "TTTT", 1)
- assert_equal([0], digest.positions)
- end
-
- def test_Digest_positions_return_correctly_at_end
- @entry.seq = "aaaTTTT"
- digest = Digest.new(@entry, "TTTT", 3)
- assert_equal([], digest.positions)
- end
-
- def test_Digest_positions_return_correctly_with_ambibuity_pattern
- @entry.seq = "aaaTTTT"
- digest = Digest.new(@entry, "TTNT", 1)
- assert_equal([3], digest.positions)
- end
-
- def test_Digest_products_return_correctly
+ def test_Digest_each
@entry.seq = "aaaaTTTTbbbbTTTT"
digest = Digest.new(@entry, "TTNT", 1)
- assert_equal(["aaaaT", "TTTbbbbT", "TTT"], digest.products)
+ assert_equal("aaaaT", digest.first.seq)
end
end