+# Residue alphabets
DNA = %w[a t c g]
RNA = %w[a u c g]
PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g]
class Seq
attr_accessor :seq_name, :seq, :type, :qual
+ # Initialize a sequence object with the following arguments:
+ # - seq_name: Name of the sequence.
+ # - seq: The sequence.
+ # - type: The sequence type - DNA, RNA, or protein
+ # - qual: A Solexa type quality scores string.
def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
@seq_name = seq_name
@seq = seq
@qual = qual
end
+ # Returns the length of a sequence.
def length
self.seq.nil? ? 0 : self.seq.length
end
self.type == 'dna'
end
+ # Method that returns true is a given sequence type is RNA.
def is_rna?
self.type == 'rna'
end
+ # Method that returns true is a given sequence type is protein.
def is_protein?
self.type == 'protein'
end
end
end
+# Error class for all exceptions to do with Digest.
+class DigestError < StandardError; end
+
+class Digest
+ # Initialize a digest object with the following arguments:
+ # - seq: A sequence object.
+ # - pattern: A restriction enzyme recognition pattern.
+ # - cut_pos: Offset from match position where the enzyme cuts.
+ def initialize(seq, pattern, cut_pos)
+ @seq = seq
+ @pattern = disambiguate(pattern)
+ @cut_pos = cut_pos
+ end
+
+ # Method that returns a list of positions where
+ # a specified restriction enzyme will cut the sequence.
+ def positions
+ positions = []
+
+ @seq.seq.scan @pattern do |match|
+ pos = $`.length + @cut_pos - 1
+
+ if pos >= 0 and pos < @seq.seq.length - 2
+ positions << pos
+ end
+ end
+
+ positions
+ end
+
+ # Method that returns a list of strings with digestion produts
+ # from the digestion of a specified sequence with a specified
+ # restriction enzyme.
+ def products
+ products = []
+
+ beg = 0
+
+ self.positions.each do |pos|
+ products << @seq.seq[beg .. pos]
+ beg = pos + 1
+ end
+
+ if beg < @seq.seq.length
+ products << @seq.seq[beg .. @seq.seq.length]
+ end
+
+ products
+ end
+
+ private
+
+ # Method that returns a regexp object with a restriction
+ # enzyme pattern with ambiguity codes substituted to the
+ # appropriate regexp.
+ def disambiguate(pattern)
+ ambiguity = {
+ 'A' => "A",
+ 'T' => "T",
+ 'U' => "T",
+ 'C' => "C",
+ 'G' => "G",
+ 'M' => "[AC]",
+ 'R' => "[AG]",
+ 'W' => "[AT]",
+ 'S' => "[CG]",
+ 'Y' => "[CT]",
+ 'K' => "[GT]",
+ 'V' => "[ACG]",
+ 'H' => "[ACT]",
+ 'D' => "[AGT]",
+ 'B' => "[CGT]",
+ 'N' => "[GATC]"
+ }
+
+ new_pattern = ""
+
+ pattern.upcase.each_char do |char|
+ if ambiguity.has_key? char
+ new_pattern << ambiguity[char]
+ else
+ raise DigestError, "Could not disambiguate residue: #{char}"
+ end
+ end
+
+ Regexp.new(new_pattern)
+ end
+end
+
+
__END__
def test_Sequence_length_is_correct
@entry.seq = 'ATCG'
- assert_equal(@entry.length, 4)
+ assert_equal(4, @entry.length)
end
def test_Seq_to_rna_raises_if_no_sequence
def test_Seq_to_rna_transcribes_correctly
@entry.seq = 'ATCGatcg'
@entry.type = 'dna'
- assert_equal(@entry.to_rna, "AUCGaucg")
+ assert_equal("AUCGaucg", @entry.to_rna)
end
def test_Seq_to_rna_changes_entry_type_to_rna
@entry.seq = 'ATCGatcg'
@entry.type = 'dna'
@entry.to_rna
- assert_equal(@entry.type, "rna")
+ assert_equal("rna", @entry.type)
end
def test_Seq_to_dna_raises_if_no_sequence
def test_Seq_to_dna_transcribes_correctly
@entry.seq = 'AUCGaucg'
@entry.type = 'rna'
- assert_equal(@entry.to_dna, "ATCGatcg")
+ assert_equal("ATCGatcg", @entry.to_dna)
end
def test_Seq_to_dna_changes_entry_type_to_dna
@entry.seq = 'AUCGaucg'
@entry.type = 'rna'
@entry.to_dna
- assert_equal(@entry.type, "dna")
+ assert_equal("dna", @entry.type)
end
def test_Seq_to_bp_returns_correct_record
@entry.seq_name = 'test'
- @entry.seq = 'ATCG'
- assert_equal(@entry.to_bp, {"SEQ_NAME"=>"test", "SEQ"=>"ATCG", "SEQ_LEN"=>4})
+ @entry.seq = 'ATCG'
+ assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
end
def test_Seq_to_bp_raises_on_missing_seq_name
end
def test_Seq_to_fasta_returns_correct_entry
- entry = Seq.new( "test", "ATCG" )
- assert_equal(">test\nATCG\n", entry.to_fasta)
+ @entry.seq_name = 'test'
+ @entry.seq = 'ATCG'
+ assert_equal(">test\nATCG\n", @entry.to_fasta)
end
def test_Seq_to_fasta_wraps_correctly
- entry = Seq.new( "test", "ATCG" )
+ entry = Seq.new("test", "ATCG")
assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
end
+ def test_Seq_reverse_returns_correctly
+ @entry.seq = "ATCG"
+ assert_equal("GCTA", @entry.reverse)
+ end
+
def test_Seq_complement_raises_if_no_sequence
@entry.type = 'dna'
assert_raise(SeqError) { @entry.complement }
def test_Seq_complement_for_DNA_is_correct
@entry.seq = 'ATCGatcg'
@entry.type = 'dna'
- assert_equal(@entry.complement, "TAGCtagc")
+ assert_equal("TAGCtagc", @entry.complement)
end
def test_Seq_complement_for_RNA_is_correct
@entry.seq = 'AUCGaucg'
@entry.type = 'rna'
- assert_equal(@entry.complement, "UAGCuagc")
+ assert_equal("UAGCuagc", @entry.complement)
+ end
+
+ def test_Seq_reverse_complement_for_DNA_is_correct
+ @entry.seq = 'ATCGatcg'
+ @entry.type = 'dna'
+ assert_equal("cgatCGAT", @entry.reverse_complement)
+ end
+
+ def test_Seq_reverse_complement_for_RNA_is_correct
+ @entry.seq = 'AUCGaucg'
+ @entry.type = 'rna'
+ assert_equal("cgauCGAU", @entry.reverse_complement)
end
def test_Seq_generate_raises_if_length_lt_0
assert_nothing_raised { @entry.generate(10, type) }
end
end
+
+ def test_Digest_new_raises_on_bad_pattern_residue
+ assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
+ end
+
+ def test_Digest_new_dont_raise_on_ok_pattern_residue
+ assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
+ end
+
+ def test_Digest_positions_return_correctly_at_begin
+ @entry.seq = "TTTTaaa"
+ digest = Digest.new(@entry, "TTTT", 0)
+ assert_equal([], digest.positions)
+ end
+
+ def test_Digest_positions_return_correctly
+ @entry.seq = "TTTTaaa"
+ digest = Digest.new(@entry, "TTTT", 1)
+ assert_equal([0], digest.positions)
+ end
+
+ def test_Digest_positions_return_correctly_at_end
+ @entry.seq = "aaaTTTT"
+ digest = Digest.new(@entry, "TTTT", 3)
+ assert_equal([], digest.positions)
+ end
+
+ def test_Digest_positions_return_correctly_with_ambibuity_pattern
+ @entry.seq = "aaaTTTT"
+ digest = Digest.new(@entry, "TTNT", 1)
+ assert_equal([3], digest.positions)
+ end
+
+ def test_Digest_products_return_correctly
+ @entry.seq = "aaaaTTTTbbbbTTTT"
+ digest = Digest.new(@entry, "TTNT", 1)
+ assert_equal(["aaaaT", "TTTbbbbT", "TTT"], digest.products)
+ end
end