From: martinahansen Date: Mon, 20 Sep 2010 07:54:22 +0000 (+0000) Subject: added digest_seq Biopiece X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=3e586e24be9e52065055932934020f3f0fd31794;p=biopieces.git added digest_seq Biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1094 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/digest_seq b/bp_bin/digest_seq new file mode 100755 index 0000000..3fab531 --- /dev/null +++ b/bp_bin/digest_seq @@ -0,0 +1,63 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Splits sequences in the stream at a given restriction enzyme's cleavage points. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'biopieces' +require 'fasta' +require 'seq' + +casts = [] +casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'cut_pos', :short=>'c', :type=>'int', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +bp.each_record do |record| + bp.puts record + + if record.has_key? :SEQ_NAME and record.has_key? :SEQ + seq = Seq.new(record[:SEQ_NAME], record[:SEQ]) + digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos]) + + digest.each do |subseq| + new_record = subseq.to_bp + new_record[:REC_TYPE] = "DIGEST" + bp.puts new_record + end + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_bin/pcr_seq b/bp_bin/pcr_seq index ff930cc..d3ce39a 100755 --- a/bp_bin/pcr_seq +++ b/bp_bin/pcr_seq @@ -51,7 +51,7 @@ class Pcr outfile = pat_file.sub("pat", "fna") command = "scan_for_matches" - command << " -c" + # command << " -c" command << " #{pat_file}" command << " < #{@infile}" command << " > #{outfile}" diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index 68d6bad..8b98ace 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -141,6 +141,8 @@ end class DigestError < StandardError; end class Digest + include Enumerable + # Initialize a digest object with the following arguments: # - seq: A sequence object. # - pattern: A restriction enzyme recognition pattern. @@ -149,42 +151,31 @@ class Digest @seq = seq @pattern = disambiguate(pattern) @cut_pos = cut_pos + @offset = 0 end - # Method that returns a list of positions where - # a specified restriction enzyme will cut the sequence. - def positions - positions = [] - - @seq.seq.scan @pattern do |match| + # Method to get the next digestion product from a sequence. + def each + @seq.seq.scan @pattern do pos = $`.length + @cut_pos - 1 - + if pos >= 0 and pos < @seq.seq.length - 2 - positions << pos - end - end + seq = Seq.new("#{@seq.seq_name}[#{@offset + 1}-#{pos + 1}]", @seq.seq[@offset .. pos], @seq.type) - positions - end - - # Method that returns a list of strings with digestion produts - # from the digestion of a specified sequence with a specified - # restriction enzyme. - def products - products = [] - - beg = 0 + yield seq + end - self.positions.each do |pos| - products << @seq.seq[beg .. pos] - beg = pos + 1 + @offset = pos + 1 end - if beg < @seq.seq.length - products << @seq.seq[beg .. @seq.seq.length] + # Handle remaining sequence after last cut. + if @offset > 0 and @offset < @seq.seq.length + seq = Seq.new("#{@seq.seq_name}[#{@offset + 1}-#{@seq.seq.length}]", @seq.seq[@offset .. @seq.seq.length], @seq.type) + + yield seq end - products + self # conventionally end private diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index 2e003e2..a8e8166 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -185,34 +185,10 @@ class TestSeq < Test::Unit::TestCase assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) } end - def test_Digest_positions_return_correctly_at_begin - @entry.seq = "TTTTaaa" - digest = Digest.new(@entry, "TTTT", 0) - assert_equal([], digest.positions) - end - - def test_Digest_positions_return_correctly - @entry.seq = "TTTTaaa" - digest = Digest.new(@entry, "TTTT", 1) - assert_equal([0], digest.positions) - end - - def test_Digest_positions_return_correctly_at_end - @entry.seq = "aaaTTTT" - digest = Digest.new(@entry, "TTTT", 3) - assert_equal([], digest.positions) - end - - def test_Digest_positions_return_correctly_with_ambibuity_pattern - @entry.seq = "aaaTTTT" - digest = Digest.new(@entry, "TTNT", 1) - assert_equal([3], digest.positions) - end - - def test_Digest_products_return_correctly + def test_Digest_each @entry.seq = "aaaaTTTTbbbbTTTT" digest = Digest.new(@entry, "TTNT", 1) - assert_equal(["aaaaT", "TTTbbbbT", "TTT"], digest.products) + assert_equal("aaaaT", digest.first.seq) end end