]> git.donarmstrong.com Git - biopieces.git/commitdiff
added Digest class to ruby code
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 17 Sep 2010 09:59:00 +0000 (09:59 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 17 Sep 2010 09:59:00 +0000 (09:59 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1093 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

index e00590c95254b4b3388e8be890ab66ed88f9eae4..68d6bad7e727be263e81ab148cd29f515b49284f 100644 (file)
@@ -1,3 +1,4 @@
+# Residue alphabets
 DNA     = %w[a t c g]
 RNA     = %w[a u c g]
 PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g]
@@ -8,6 +9,11 @@ class SeqError < StandardError; end
 class Seq
   attr_accessor :seq_name, :seq, :type, :qual
 
+  # Initialize a sequence object with the following arguments:
+  # - seq_name: Name of the sequence.
+  # - seq: The sequence.
+  # - type: The sequence type - DNA, RNA, or protein
+  # - qual: A Solexa type quality scores string.
   def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
     @seq_name = seq_name
     @seq      = seq
@@ -15,6 +21,7 @@ class Seq
     @qual     = qual
   end
 
+  # Returns the length of a sequence.
   def length
     self.seq.nil? ? 0 : self.seq.length
   end
@@ -26,10 +33,12 @@ class Seq
     self.type == 'dna'
   end
 
+  # Method that returns true is a given sequence type is RNA.
   def is_rna?
     self.type == 'rna'
   end
 
+  # Method that returns true is a given sequence type is protein.
   def is_protein?
     self.type == 'protein'
   end
@@ -128,6 +137,96 @@ class Seq
   end
 end
 
+# Error class for all exceptions to do with Digest.
+class DigestError < StandardError; end
+
+class Digest
+  # Initialize a digest object with the following arguments:
+  # - seq: A sequence object.
+  # - pattern: A restriction enzyme recognition pattern.
+  # - cut_pos: Offset from match position where the enzyme cuts.
+  def initialize(seq, pattern, cut_pos)
+    @seq     = seq
+    @pattern = disambiguate(pattern)
+    @cut_pos = cut_pos
+  end
+
+  # Method that returns a list of positions where
+  # a specified restriction enzyme will cut the sequence.
+  def positions
+    positions = []
+
+    @seq.seq.scan @pattern do |match|
+      pos = $`.length + @cut_pos - 1
+
+      if pos >= 0 and pos < @seq.seq.length - 2
+        positions << pos
+      end
+    end
+
+    positions
+  end
+
+  # Method that returns a list of strings with digestion produts
+  # from the digestion of a specified sequence with a specified 
+  # restriction enzyme.
+  def products
+    products = []
+
+    beg = 0
+
+    self.positions.each do |pos|
+      products << @seq.seq[beg .. pos]
+      beg = pos + 1
+    end
+
+    if beg < @seq.seq.length
+      products << @seq.seq[beg .. @seq.seq.length]
+    end
+
+    products
+  end
+
+  private
+
+  # Method that returns a regexp object with a restriction
+  # enzyme pattern with ambiguity codes substituted to the
+  # appropriate regexp.
+  def disambiguate(pattern)
+    ambiguity = {
+      'A' => "A",
+      'T' => "T",
+      'U' => "T",
+      'C' => "C",
+      'G' => "G",
+      'M' => "[AC]",
+      'R' => "[AG]",
+      'W' => "[AT]",
+      'S' => "[CG]",
+      'Y' => "[CT]",
+      'K' => "[GT]",
+      'V' => "[ACG]",
+      'H' => "[ACT]",
+      'D' => "[AGT]",
+      'B' => "[CGT]",
+      'N' => "[GATC]"
+    }
+
+    new_pattern = ""
+
+    pattern.upcase.each_char do |char|
+      if ambiguity.has_key? char
+        new_pattern << ambiguity[char]
+      else
+        raise DigestError, "Could not disambiguate residue: #{char}"
+      end
+    end
+
+    Regexp.new(new_pattern)
+  end
+end
+
+
 __END__
 
 
index 52f14afa59a2bcf2ea424c1e0e70185aa1fbfda5..2e003e2e17c6dc21d83441f4580383a134b5c13d 100755 (executable)
@@ -44,7 +44,7 @@ class TestSeq < Test::Unit::TestCase
 
   def test_Sequence_length_is_correct
     @entry.seq = 'ATCG'
-    assert_equal(@entry.length, 4)
+    assert_equal(4, @entry.length)
   end
 
   def test_Seq_to_rna_raises_if_no_sequence
@@ -61,14 +61,14 @@ class TestSeq < Test::Unit::TestCase
   def test_Seq_to_rna_transcribes_correctly
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
-    assert_equal(@entry.to_rna, "AUCGaucg")
+    assert_equal("AUCGaucg", @entry.to_rna)
   end
 
   def test_Seq_to_rna_changes_entry_type_to_rna
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
     @entry.to_rna
-    assert_equal(@entry.type, "rna")
+    assert_equal("rna", @entry.type)
   end
 
   def test_Seq_to_dna_raises_if_no_sequence
@@ -85,20 +85,20 @@ class TestSeq < Test::Unit::TestCase
   def test_Seq_to_dna_transcribes_correctly
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
-    assert_equal(@entry.to_dna, "ATCGatcg")
+    assert_equal("ATCGatcg", @entry.to_dna)
   end
 
   def test_Seq_to_dna_changes_entry_type_to_dna
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
     @entry.to_dna
-    assert_equal(@entry.type, "dna")
+    assert_equal("dna", @entry.type)
   end
 
   def test_Seq_to_bp_returns_correct_record
     @entry.seq_name = 'test'
-    @entry.seq = 'ATCG'
-    assert_equal(@entry.to_bp, {"SEQ_NAME"=>"test", "SEQ"=>"ATCG", "SEQ_LEN"=>4})
+    @entry.seq      = 'ATCG'
+    assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
   end
 
   def test_Seq_to_bp_raises_on_missing_seq_name
@@ -112,15 +112,21 @@ class TestSeq < Test::Unit::TestCase
   end
 
   def test_Seq_to_fasta_returns_correct_entry
-    entry = Seq.new( "test", "ATCG" )
-    assert_equal(">test\nATCG\n", entry.to_fasta)
+    @entry.seq_name = 'test'
+    @entry.seq      = 'ATCG'
+    assert_equal(">test\nATCG\n", @entry.to_fasta)
   end
 
   def test_Seq_to_fasta_wraps_correctly
-    entry = Seq.new( "test", "ATCG" )
+    entry = Seq.new("test", "ATCG")
     assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
   end
 
+  def test_Seq_reverse_returns_correctly
+    @entry.seq = "ATCG"
+    assert_equal("GCTA", @entry.reverse)
+  end
+
   def test_Seq_complement_raises_if_no_sequence
     @entry.type = 'dna'
     assert_raise(SeqError) { @entry.complement }
@@ -135,13 +141,25 @@ class TestSeq < Test::Unit::TestCase
   def test_Seq_complement_for_DNA_is_correct
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
-    assert_equal(@entry.complement, "TAGCtagc")
+    assert_equal("TAGCtagc", @entry.complement)
   end
 
   def test_Seq_complement_for_RNA_is_correct
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
-    assert_equal(@entry.complement, "UAGCuagc")
+    assert_equal("UAGCuagc", @entry.complement)
+  end
+
+  def test_Seq_reverse_complement_for_DNA_is_correct
+    @entry.seq  = 'ATCGatcg'
+    @entry.type = 'dna'
+    assert_equal("cgatCGAT", @entry.reverse_complement)
+  end
+
+  def test_Seq_reverse_complement_for_RNA_is_correct
+    @entry.seq  = 'AUCGaucg'
+    @entry.type = 'rna'
+    assert_equal("cgauCGAU", @entry.reverse_complement)
   end
 
   def test_Seq_generate_raises_if_length_lt_0
@@ -158,6 +176,44 @@ class TestSeq < Test::Unit::TestCase
       assert_nothing_raised { @entry.generate(10, type) }
     end
   end
+
+  def test_Digest_new_raises_on_bad_pattern_residue
+    assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
+  end
+
+  def test_Digest_new_dont_raise_on_ok_pattern_residue
+    assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
+  end
+
+  def test_Digest_positions_return_correctly_at_begin
+    @entry.seq = "TTTTaaa"
+    digest = Digest.new(@entry, "TTTT", 0)
+    assert_equal([], digest.positions)
+  end
+
+  def test_Digest_positions_return_correctly
+    @entry.seq = "TTTTaaa"
+    digest = Digest.new(@entry, "TTTT", 1)
+    assert_equal([0], digest.positions)
+  end
+
+  def test_Digest_positions_return_correctly_at_end
+    @entry.seq = "aaaTTTT"
+    digest = Digest.new(@entry, "TTTT", 3)
+    assert_equal([], digest.positions)
+  end
+
+  def test_Digest_positions_return_correctly_with_ambibuity_pattern
+    @entry.seq = "aaaTTTT"
+    digest = Digest.new(@entry, "TTNT", 1)
+    assert_equal([3], digest.positions)
+  end
+
+  def test_Digest_products_return_correctly
+    @entry.seq = "aaaaTTTTbbbbTTTT"
+    digest = Digest.new(@entry, "TTNT", 1)
+    assert_equal(["aaaaT", "TTTbbbbT", "TTT"], digest.products)
+  end
 end