+++ /dev/null
-# Copyright (C) 2007-2011 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# This software is part of the Biopieces framework (www.biopieces.org).
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# Error class for all exceptions to do with Digest.
-class DigestError < StandardError; end
-
-module Digest
- # Method to get the next digestion product from a sequence.
- def each_digest(pattern, cut_pos)
- pattern = disambiguate(pattern)
- results = []
- offset = 0
-
- self.seq.upcase.scan pattern do
- pos = $`.length + cut_pos
-
- if pos >= 0 and pos < self.length - 2
- subseq = self.subseq(offset, pos - offset)
- subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]"
-
- block_given? ? (yield subseq) : (results << subseq)
- end
-
- offset = pos
- end
-
- if offset < 0 or offset > self.length
- offset = 0
- end
-
- subseq = self.subseq(offset)
- subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]"
-
- block_given? ? (yield subseq) : (results << subseq)
-
- return results unless block_given?
- end
-
- private
-
- # Method that returns a regexp object with a restriction
- # enzyme pattern with ambiguity codes substituted to the
- # appropriate regexp.
- def disambiguate(pattern)
- ambiguity = {
- 'A' => "A",
- 'T' => "T",
- 'U' => "T",
- 'C' => "C",
- 'G' => "G",
- 'M' => "[AC]",
- 'R' => "[AG]",
- 'W' => "[AT]",
- 'S' => "[CG]",
- 'Y' => "[CT]",
- 'K' => "[GT]",
- 'V' => "[ACG]",
- 'H' => "[ACT]",
- 'D' => "[AGT]",
- 'B' => "[CGT]",
- 'N' => "[GATC]"
- }
-
- new_pattern = ""
-
- pattern.upcase.each_char do |char|
- if ambiguity.has_key? char
- new_pattern << ambiguity[char]
- else
- raise DigestError, "Could not disambiguate residue: #{char}"
- end
- end
-
- Regexp.new(new_pattern)
- end
-end
require 'maasha/patternmatcher'
require 'maasha/bits'
require 'maasha/backtrack'
-require 'maasha/digest'
+require 'maasha/seq/digest'
require 'maasha/seq/trim'
require 'narray'
#require 'maasha/patscan'
--- /dev/null
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Error class for all exceptions to do with Digest.
+class DigestError < StandardError; end
+
+module Digest
+ # Method to get the next digestion product from a sequence.
+ def each_digest(pattern, cut_pos)
+ pattern = disambiguate(pattern)
+ results = []
+ offset = 0
+
+ self.seq.upcase.scan pattern do
+ pos = $`.length + cut_pos
+
+ if pos >= 0 and pos < self.length - 2
+ subseq = self.subseq(offset, pos - offset)
+ subseq.seq_name = "#{self.seq_name}[#{offset}-#{pos - offset - 1}]"
+
+ block_given? ? (yield subseq) : (results << subseq)
+ end
+
+ offset = pos
+ end
+
+ if offset < 0 or offset > self.length
+ offset = 0
+ end
+
+ subseq = self.subseq(offset)
+ subseq.seq_name = "#{self.seq_name}[#{offset}-#{self.length - 1}]"
+
+ block_given? ? (yield subseq) : (results << subseq)
+
+ return results unless block_given?
+ end
+
+ private
+
+ # Method that returns a regexp object with a restriction
+ # enzyme pattern with ambiguity codes substituted to the
+ # appropriate regexp.
+ def disambiguate(pattern)
+ ambiguity = {
+ 'A' => "A",
+ 'T' => "T",
+ 'U' => "T",
+ 'C' => "C",
+ 'G' => "G",
+ 'M' => "[AC]",
+ 'R' => "[AG]",
+ 'W' => "[AT]",
+ 'S' => "[CG]",
+ 'Y' => "[CT]",
+ 'K' => "[GT]",
+ 'V' => "[ACG]",
+ 'H' => "[ACT]",
+ 'D' => "[AGT]",
+ 'B' => "[CGT]",
+ 'N' => "[GATC]"
+ }
+
+ new_pattern = ""
+
+ pattern.upcase.each_char do |char|
+ if ambiguity.has_key? char
+ new_pattern << ambiguity[char]
+ else
+ raise DigestError, "Could not disambiguate residue: #{char}"
+ end
+ end
+
+ Regexp.new(new_pattern)
+ end
+end
require 'inline'
+# Error class for all exceptions to do with Trim.
class TrimError < StandardError; end
# Module containing methods for end trimming sequences with suboptimal quality
--- /dev/null
+#!/usr/bin/env ruby
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'maasha/seq'
+require 'test/unit'
+require 'pp'
+
+class TestDigest < Test::Unit::TestCase
+ def test_Digest_each_digest_raises_on_bad_pattern_residue
+ entry = Seq.new
+ assert_raise(DigestError) { entry.each_digest("X", 4) }
+ end
+
+ def test_Digest_each_digest_dont_raise_on_ok_pattern_residue
+ entry = Seq.new("test", "atcg")
+ assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
+ end
+
+ def test_Digest_each_digest_with_no_match_returns_correctly
+ entry = Seq.new("test", "aaaaaaaaaaa")
+ assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq)
+ end
+
+ def test_Digest_each_digest_with_matches_returns_correctly
+ entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT")
+ seqs = entry.each_digest("TTNT", 1)
+ assert_equal("T", seqs[0].seq)
+ assert_equal("TTTaaaaT", seqs[1].seq)
+ assert_equal("TTTbbbbT", seqs[2].seq)
+ assert_equal("TTT", seqs[3].seq)
+ end
+end
+
+
+__END__
+++ /dev/null
-#!/usr/bin/env ruby
-$:.unshift File.join(File.dirname(__FILE__),'..','lib')
-
-# Copyright (C) 2007-2011 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# This software is part of the Biopieces framework (www.biopieces.org).
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-require 'maasha/seq'
-require 'test/unit'
-require 'pp'
-
-class TestDigest < Test::Unit::TestCase
- def test_Digest_each_digest_raises_on_bad_pattern_residue
- entry = Seq.new
- assert_raise(DigestError) { entry.each_digest("X", 4) }
- end
-
- def test_Digest_each_digest_dont_raise_on_ok_pattern_residue
- entry = Seq.new("test", "atcg")
- assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
- end
-
- def test_Digest_each_digest_with_no_match_returns_correctly
- entry = Seq.new("test", "aaaaaaaaaaa")
- assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq)
- end
-
- def test_Digest_each_digest_with_matches_returns_correctly
- entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT")
- seqs = entry.each_digest("TTNT", 1)
- assert_equal("T", seqs[0].seq)
- assert_equal("TTTaaaaT", seqs[1].seq)
- assert_equal("TTTbbbbT", seqs[2].seq)
- assert_equal("TTT", seqs[3].seq)
- end
-end
-
-
-__END__