--- /dev/null
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'seq'
+
+# Error class for all exceptions to do with Digest.
+class DigestError < StandardError; end
+
+class Digest
+ include Enumerable
+
+ # Initialize a digest object with the following arguments:
+ # - seq: A sequence object.
+ # - pattern: A restriction enzyme recognition pattern.
+ # - cut_pos: Offset from match position where the enzyme cuts.
+ def initialize(seq, pattern, cut_pos)
+ @seq = seq
+ @pattern = disambiguate(pattern)
+ @cut_pos = cut_pos
+ @offset = 0
+ end
+
+ # Method to get the next digestion product from a sequence.
+ def each
+ @seq.seq.upcase.scan @pattern do
+ pos = $`.length + @cut_pos - 1
+
+ if pos >= 0 and pos < @seq.length - 2
+ seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{pos}]", @seq.seq[@offset .. pos], @seq.type)
+
+ yield seq
+ end
+
+ @offset = pos + 1
+ end
+
+ if @offset < 0
+ @offset = 0
+ elsif @offset > @seq.length
+ @offset = 0
+ end
+
+ seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{@seq.length - 1}]", @seq.seq[@offset .. @seq.length], @seq.type)
+
+ yield seq
+
+ self # conventionally
+ end
+
+ private
+
+ # Method that returns a regexp object with a restriction
+ # enzyme pattern with ambiguity codes substituted to the
+ # appropriate regexp.
+ def disambiguate(pattern)
+ ambiguity = {
+ 'A' => "A",
+ 'T' => "T",
+ 'U' => "T",
+ 'C' => "C",
+ 'G' => "G",
+ 'M' => "[AC]",
+ 'R' => "[AG]",
+ 'W' => "[AT]",
+ 'S' => "[CG]",
+ 'Y' => "[CT]",
+ 'K' => "[GT]",
+ 'V' => "[ACG]",
+ 'H' => "[ACT]",
+ 'D' => "[AGT]",
+ 'B' => "[CGT]",
+ 'N' => "[GATC]"
+ }
+
+ new_pattern = ""
+
+ pattern.upcase.each_char do |char|
+ if ambiguity.has_key? char
+ new_pattern << ambiguity[char]
+ else
+ raise DigestError, "Could not disambiguate residue: #{char}"
+ end
+ end
+
+ Regexp.new(new_pattern)
+ end
+end
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'amatch'
+require 'digest'
# Residue alphabets
DNA = %w[a t c g]
end
end
-# Error class for all exceptions to do with Digest.
-class DigestError < StandardError; end
-
-class Digest
- include Enumerable
-
- # Initialize a digest object with the following arguments:
- # - seq: A sequence object.
- # - pattern: A restriction enzyme recognition pattern.
- # - cut_pos: Offset from match position where the enzyme cuts.
- def initialize(seq, pattern, cut_pos)
- @seq = seq
- @pattern = disambiguate(pattern)
- @cut_pos = cut_pos
- @offset = 0
- end
-
- # Method to get the next digestion product from a sequence.
- def each
- @seq.seq.upcase.scan @pattern do
- pos = $`.length + @cut_pos - 1
-
- if pos >= 0 and pos < @seq.length - 2
- seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{pos}]", @seq.seq[@offset .. pos], @seq.type)
-
- yield seq
- end
-
- @offset = pos + 1
- end
-
- if @offset < 0
- @offset = 0
- elsif @offset > @seq.length
- @offset = 0
- end
-
- seq = Seq.new("#{@seq.seq_name}[#{@offset}-#{@seq.length - 1}]", @seq.seq[@offset .. @seq.length], @seq.type)
-
- yield seq
-
- self # conventionally
- end
-
- private
-
- # Method that returns a regexp object with a restriction
- # enzyme pattern with ambiguity codes substituted to the
- # appropriate regexp.
- def disambiguate(pattern)
- ambiguity = {
- 'A' => "A",
- 'T' => "T",
- 'U' => "T",
- 'C' => "C",
- 'G' => "G",
- 'M' => "[AC]",
- 'R' => "[AG]",
- 'W' => "[AT]",
- 'S' => "[CG]",
- 'Y' => "[CT]",
- 'K' => "[GT]",
- 'V' => "[ACG]",
- 'H' => "[ACT]",
- 'D' => "[AGT]",
- 'B' => "[CGT]",
- 'N' => "[GATC]"
- }
-
- new_pattern = ""
-
- pattern.upcase.each_char do |char|
- if ambiguity.has_key? char
- new_pattern << ambiguity[char]
- else
- raise DigestError, "Could not disambiguate residue: #{char}"
- end
- end
-
- Regexp.new(new_pattern)
- end
-end
-
__END__