]> git.donarmstrong.com Git - biopieces.git/commitdiff
added digest_seq Biopiece
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 20 Sep 2010 07:54:22 +0000 (07:54 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 20 Sep 2010 07:54:22 +0000 (07:54 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1094 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/digest_seq [new file with mode: 0755]
bp_bin/pcr_seq
code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

diff --git a/bp_bin/digest_seq b/bp_bin/digest_seq
new file mode 100755 (executable)
index 0000000..3fab531
--- /dev/null
@@ -0,0 +1,63 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Splits sequences in the stream at a given restriction enzyme's cleavage points.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'biopieces'
+require 'fasta'
+require 'seq'
+
+casts = []
+casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'cut_pos', :short=>'c', :type=>'int',    :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+bp.each_record do |record|
+  bp.puts record
+
+  if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+    seq    = Seq.new(record[:SEQ_NAME], record[:SEQ])
+    digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos])
+
+    digest.each do |subseq|
+      new_record            = subseq.to_bp
+      new_record[:REC_TYPE] = "DIGEST"
+      bp.puts new_record
+    end
+  end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
index ff930cca29e6201b18f273eb57fdb7d285859b3d..d3ce39a092a1fc6d68e08a8801c7eb9257b6d8a9 100755 (executable)
@@ -51,7 +51,7 @@ class Pcr
       outfile = pat_file.sub("pat", "fna")
 
       command =  "scan_for_matches"
-      command << " -c"
+      command << " -c"
       command << " #{pat_file}"
       command << " < #{@infile}"
       command << " > #{outfile}"
index 68d6bad7e727be263e81ab148cd29f515b49284f..8b98acee5d13bd1a4e344d37752f8b3569794033 100644 (file)
@@ -141,6 +141,8 @@ end
 class DigestError < StandardError; end
 
 class Digest
+  include Enumerable
+
   # Initialize a digest object with the following arguments:
   # - seq: A sequence object.
   # - pattern: A restriction enzyme recognition pattern.
@@ -149,42 +151,31 @@ class Digest
     @seq     = seq
     @pattern = disambiguate(pattern)
     @cut_pos = cut_pos
+    @offset  = 0
   end
 
-  # Method that returns a list of positions where
-  # a specified restriction enzyme will cut the sequence.
-  def positions
-    positions = []
-
-    @seq.seq.scan @pattern do |match|
+  # Method to get the next digestion product from a sequence.
+  def each
+    @seq.seq.scan @pattern do
       pos = $`.length + @cut_pos - 1
-
+     
       if pos >= 0 and pos < @seq.seq.length - 2
-        positions << pos
-      end
-    end
+        seq = Seq.new("#{@seq.seq_name}[#{@offset + 1}-#{pos + 1}]", @seq.seq[@offset .. pos], @seq.type)
 
-    positions
-  end
-
-  # Method that returns a list of strings with digestion produts
-  # from the digestion of a specified sequence with a specified 
-  # restriction enzyme.
-  def products
-    products = []
-
-    beg = 0
+        yield seq
+      end
 
-    self.positions.each do |pos|
-      products << @seq.seq[beg .. pos]
-      beg = pos + 1
+      @offset = pos + 1
     end
 
-    if beg < @seq.seq.length
-      products << @seq.seq[beg .. @seq.seq.length]
+    # Handle remaining sequence after last cut.
+    if @offset > 0 and @offset < @seq.seq.length
+      seq = Seq.new("#{@seq.seq_name}[#{@offset + 1}-#{@seq.seq.length}]", @seq.seq[@offset .. @seq.seq.length], @seq.type)
+
+      yield seq
     end
 
-    products
+    self # conventionally
   end
 
   private
index 2e003e2e17c6dc21d83441f4580383a134b5c13d..a8e81666f0bf03f1fdc35f6aaf8cf4281165165b 100755 (executable)
@@ -185,34 +185,10 @@ class TestSeq < Test::Unit::TestCase
     assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
   end
 
-  def test_Digest_positions_return_correctly_at_begin
-    @entry.seq = "TTTTaaa"
-    digest = Digest.new(@entry, "TTTT", 0)
-    assert_equal([], digest.positions)
-  end
-
-  def test_Digest_positions_return_correctly
-    @entry.seq = "TTTTaaa"
-    digest = Digest.new(@entry, "TTTT", 1)
-    assert_equal([0], digest.positions)
-  end
-
-  def test_Digest_positions_return_correctly_at_end
-    @entry.seq = "aaaTTTT"
-    digest = Digest.new(@entry, "TTTT", 3)
-    assert_equal([], digest.positions)
-  end
-
-  def test_Digest_positions_return_correctly_with_ambibuity_pattern
-    @entry.seq = "aaaTTTT"
-    digest = Digest.new(@entry, "TTNT", 1)
-    assert_equal([3], digest.positions)
-  end
-
-  def test_Digest_products_return_correctly
+  def test_Digest_each
     @entry.seq = "aaaaTTTTbbbbTTTT"
     digest = Digest.new(@entry, "TTNT", 1)
-    assert_equal(["aaaaT", "TTTbbbbT", "TTT"], digest.products)
+    assert_equal("aaaaT", digest.first.seq)
   end
 end