# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
class Seq < String
- attr_accessor :seq, :seq_type
-
- # Method to initialize a new sequence.
- def initialize( seq = "", seq_type = nil )
- @seq = seq
- @seq_type = seq_type
- end
-
- # Method for outputting sequence as a String.
- def to_s
- @seq
- end
-
# Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
- def seq_type_guess
- seq_beg = @seq[ 0, 100 ].upcase
+ def guess_type
+ raise ArgumentError, "No sequence." if self.empty?
+
+ seq_beg = self[ 0, 100 ].upcase
if seq_beg.count( "FLPQIE" ) > 0
- :AA
+ Seq::AA.new( self )
elsif seq_beg.count( "U" ) > 0
- :RNA
+ Seq::NA::RNA.new( self )
else
- :DNA
+ Seq::NA::DNA.new( self )
end
end
- # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
- def seq_type_guess!
- @seq_type = seq_type_guess
- end
-
- # Method that return an array of the residue alphabet for a given sequence type.
- def seq_alph( seq_type )
- hash = {
- :DNA => %w{ A T C G },
- :RNA => %w{ A U C G },
- :AA => %w{ F L S Y C W P H Q R I M T N K V A D E G },
- }
-
- raise "ERROR: Sequence type '#{ seq_type }' not recognized." unless hash.include?( seq_type )
- return hash[ seq_type ]
- end
-
# Method to wrap a sequence to a given width using a given delimiter.
def wrap( width = 80, delimit = "\n" )
- raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum
- raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0
- @seq.tr!( " \t\n\r", '' )
- @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ).sub( /#{ delimit }$/, "" )
+ raise ArgumentError, "Wrap width must be an integer." unless width.is_a? Fixnum
+ raise ArgumentError, "Cannot wrap sequence to negative width: #{ width }." if width <= 0
+
+ self.delete!( " \t\n\r" )
+ self.gsub( /.{#{ width }}(?!$)/, "\\0#{ delimit }" )
end
# Method to wrap and replace a sequence to a given width using a given delimiter.
def wrap!( width = 80, delimit = "\n" )
- @seq = wrap( width, delimit )
+ self.replace( self.wrap( width, delimit ) )
end
# Method that generates a random sequence of a given length.
def generate( length )
- raise "ERROR: Length must be an integer." unless length.is_a? Fixnum
- raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0
+ raise ArgumentError, "Length must be an integer." unless length.is_a? Fixnum
+ raise ArgumentError, "Cannot generate negative sequence length: #{ length }." if length <= 0
- alph = seq_alph( @seq_type )
- seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
+ alph = self.residues
+ Array.new( length ) { alph[ rand( alph.size ) ] }.join( "" )
end
# Method that replaces sequence with a randomly generated sequence of a given length.
def generate!( length )
- @seq = generate( length )
+ self.replace( self.generate( length ) )
end
# Class containing methods specific for amino acid (AA) sequences.
class AA < Seq
- # Method to initialize a new amino acid sequence.
- def initialize( seq = "" )
- @seq = seq
- @seq_type = :AA
+ # Method that returns an array of amino acid residues.
+ def residues
+ %w{ F L S Y C W P H Q R I M T N K V A D E G }
end
# Calculate the molecular weight of an amino acid seuqunce.
# for amino bond formation and the MW used are somewhat imprecise:
# http://www.expasy.ch/tools/pscale/Molecularweight.html
def mol_weight
+ raise ArgumentError, "invalid residues found: #{self.delete("#{residues.join( "" )}")}" if self.upcase =~ /[^#{residues.join( "" )}]/
+
mol_weight_aa = {
"A" => 89.000, # Ala
"R" => 174.000, # Arg
mw = 0.0
- @seq.upcase.each_char do |c|
- raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c )
- mw += mol_weight_aa[ c ]
- end
+ self.upcase.each_char { |c| mw += mol_weight_aa[ c ] }
mw
end
class NA < Seq
# Class containing methods specific for DNA sequences.
class DNA < NA
- # Method to initialize a new DNA sequence.
- def initialize( seq = "" )
- @seq = seq
- @seq_type = :DNA
+ # Method that returns an array of DNA residues.
+ def residues
+ %w{ A T C G }
+ end
+
+ # Method to transcribe DNA to RNA.
+ def to_RNA
+ Seq::NA::RNA.new( self.tr( 'Tt', 'Uu' ) )
end
# Method that complements DNA sequence including ambiguity codes.
def complement
- @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+ self.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
end
end
# Class containing methods specific for RNA sequences.
class RNA < NA
- # Method to initialize a new RNA sequence.
- def initialize( seq = "" )
- @seq = seq
- @seq_type = :RNA
+ # Method that returns an array of RNA residues.
+ def residues
+ %w{ A U C G }
+ end
+
+ # Method to reverse transcribe RNA to DNA.
+ def to_DNA
+ Seq::NA::DNA.new( self.tr( 'Uu', 'Tt' ) )
end
# Method that complements RNA sequence including ambiguity codes.
def complement
- @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+ self.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
end
end
end
# test marked for deletion - too simple and not informative
def test_Seq_initialize_with_0_args
s = Seq.new
- assert_equal( "", s.seq )
- assert_equal( nil, s.seq_type )
+ assert_equal( "", s.to_s )
end
# test marked for deletion - too simple and not informative
def test_Seq_initialize_with_1_args
s = Seq.new( "ATCG" )
- assert_equal( "ATCG", s.seq )
- assert_equal( nil, s.seq_type )
+ assert_equal( "ATCG", s.to_s )
end
- # test marked for deletion - too simple and informative
- def test_Seq_initialize_with_2_args
- s = Seq.new( "ATCG", :DNA )
- assert_equal( "ATCG", s.seq )
- assert_equal( :DNA, s.seq_type )
- end
-
# Testing Seq#to_s
def test_to_s
s = Seq.new( "ATCG" )
assert_equal( "ATCG", s.to_s )
end
- # Testing Seq#seq_type_guess
-
- def test_seq_type_guess_DNA_uppercase
- s = Seq.new( "ATCG" )
-
- assert_equal( :DNA, s.seq_type_guess )
- end
+ # Testing Seq#guess_type
- def test_seq_type_guess_DNA_lowercase
- s = Seq.new( "atcg" )
-
- assert_equal( :DNA, s.seq_type_guess )
- end
-
- def test_seq_type_guess_RNA_uppercase
- s = Seq.new( "AUCG" )
- assert_equal( :RNA, s.seq_type_guess )
- end
-
- def test_seq_type_guess_RNA_lowercase
- s = Seq.new( "aucg" )
- assert_equal( :RNA, s.seq_type_guess )
- end
-
- def test_seq_type_guess_AA_uppercase
- s = Seq.new( "SEQ" )
- assert_equal( :AA, s.seq_type_guess )
- end
-
- def test_seq_type_guess_AA_lowercase
- s = Seq.new( "seq" )
- assert_equal( :AA, s.seq_type_guess )
- end
-
- def test_seq_type_guess_dont_change_instance_var
- s = Seq.new( "seq" )
- s.seq_type_guess
- assert_equal( nil, s.seq_type )
- end
-
- # Testing Seq#seq_type_guess!
-
- def test_seq_type_guess_DNA_uppercase!
- s = Seq.new( "ATCG" )
- s.seq_type_guess!
- assert_equal( :DNA, s.seq_type )
- end
+ def test_guess_type_raise_if_no_sequence
+ s = Seq.new
- def test_seq_type_guess_DNA_lowercase!
- s = Seq.new( "atcg" )
- s.seq_type_guess!
- assert_equal( :DNA, s.seq_type )
+ assert_raise( ArgumentError ) { s.guess_type }
end
- def test_seq_type_guess_RNA_uppercase!
- s = Seq.new( "AUCG" )
- s.seq_type_guess!
- assert_equal( :RNA, s.seq_type )
- end
+ def test_guess_type_AA_uppercase
+ s1 = Seq.new( "SEQ" )
+ s2 = Seq::AA.new( "SEQ" )
- def test_seq_type_guess_RNA_lowercase!
- s = Seq.new( "aucg" )
- s.seq_type_guess!
- assert_equal( :RNA, s.seq_type )
+ assert_equal( s1.guess_type.class, s2.class )
end
- def test_seq_type_guess_AA_uppercase!
- s = Seq.new( "SEQ" )
- s.seq_type_guess!
- assert_equal( :AA, s.seq_type )
- end
+ def test_guess_type_AA_lowercase
+ s1 = Seq.new( "seq" )
+ s2 = Seq::AA.new( "seq" )
- def test_seq_type_guess_AA_lowercase!
- s = Seq.new( "seq" )
- s.seq_type_guess!
- assert_equal( :AA, s.seq_type )
+ assert_equal( s1.guess_type.class, s2.class )
end
- # Testing Seq#seq_alph
-
- def test_seq_alph_DNA
- s = Seq.new
+ def test_guess_type_DNA_uppercase
+ s1 = Seq.new( "ATCG" )
+ s2 = Seq::NA::DNA.new( "ATCG" )
- assert_equal( %w{ A T C G }, s.seq_alph( :DNA ) )
+ assert_equal( s1.guess_type.class, s2.class )
end
- def test_seq_alph_RNA
- s = Seq.new
+ def test_guess_type_DNA_lowercase
+ s1 = Seq.new( "atcg" )
+ s2 = Seq::NA::DNA.new( "atcg" )
- assert_equal( %w{ A U C G }, s.seq_alph( :RNA ) )
+ assert_equal( s1.guess_type.class, s2.class )
end
- def test_seq_alph_AA
- s = Seq.new
+ def test_guess_type_RNA_uppercase
+ s1 = Seq.new( "AUCG" )
+ s2 = Seq::NA::RNA.new( "AUCG" )
- assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.seq_alph( :AA ) )
+ assert_equal( s1.guess_type.class, s2.class )
end
- def test_seq_alph_invalid
- s = Seq.new
+ def test_guess_type_RNA_lowercase
+ s1 = Seq.new( "aucg" )
+ s2 = Seq::NA::RNA.new( "aucg" )
- assert_raise( RuntimeError ) { s.seq_alph( "DNA" ) } # seq_alph wants a symbol
+ assert_equal( s1.guess_type.class, s2.class )
end
# Testing Seq#wrap
def test_wrap_arg_is_a_number
s = Seq.new
- assert_raise( RuntimeError ) { s.wrap( "FOO" ) }
+ assert_raise( ArgumentError ) { s.wrap( "FOO" ) }
end
def test_wrap_arg_is_a_positive_number
s = Seq.new
- assert_raise( RuntimeError ) { s.wrap( 0 ) }
- assert_raise( RuntimeError ) { s.wrap( -10 ) }
+ assert_raise( ArgumentError ) { s.wrap( 0 ) }
+ assert_raise( ArgumentError ) { s.wrap( -10 ) }
end
def test_wrap_with_0_args
def test_generate_arg_is_a_number
s = Seq.new
- assert_raise( RuntimeError ) { s.generate( "FOO" ) }
+ assert_raise( ArgumentError ) { s.generate( "FOO" ) }
end
def test_generate_arg_is_a_positive_number
s = Seq.new
- assert_raise( RuntimeError ) { s.generate( 0 ) }
- assert_raise( RuntimeError ) { s.generate( -10 ) }
+ assert_raise( ArgumentError ) { s.generate( 0 ) }
+ assert_raise( ArgumentError ) { s.generate( -10 ) }
end
def test_generate
- s = Seq.new( "", :AA )
+ s = Seq::AA.new
seq = s.generate( 40 )
end
def test_generate_dont_change_instance_var
- s = Seq.new( "", :AA )
+ s = Seq::AA.new
seq = s.generate( 40 )
# Testing Seq#generate!
def test_generate!
- s = Seq.new( "", :AA )
-
- gen_seq = s.generate!( 40 ).to_s
+ s = Seq::AA.new
- assert_equal( 40, gen_seq.length )
- assert_equal( 40, s.seq.length )
- assert_equal( gen_seq, s.to_s )
+ s.generate!( 40 )
+
+ assert_equal( 40, s.length )
end
- # Testing Seq::AA#initialize
+ # Testing Seq::AA#residues
- # test marked for deletion - too simple and not informative
- def test_Seq_AA_initialize_with_0_args
+ def test_Seq_AA_residues
s = Seq::AA.new
- assert_equal( "", s.to_s )
- assert_equal( :AA, s.seq_type )
- end
- # test marked for deletion - too simple and not informative
- def test_Seq_AA_initialize_with_1_args
- s = Seq::AA.new( "SEQ" )
- assert_equal( "SEQ", s.to_s )
- assert_equal( :AA, s.seq_type )
+ assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.residues )
end
# Testing Seq::AA#mol_weight
- def test_Seq_aa_mol_wight_bad_residue
+ def test_Seq_aa_mol_weight_bad_residue
s = Seq::AA.new( "7" )
- assert_raise( RuntimeError ) { s.mol_weight }
+ assert_raise( ArgumentError ) { s.mol_weight }
end
- def test_Seq_aa_mol_wight_return_correct
+ def test_Seq_aa_mol_wight_return_correct_uppercase
s = Seq::AA.new( "SEQ" )
assert_equal( 398.0, s.mol_weight )
end
- # Testing Seq::NA::DNA#initialize
+ def test_Seq_aa_mol_wight_return_correct_lowercase
+ s = Seq::AA.new( "seq" )
+ assert_equal( 398.0, s.mol_weight )
+ end
- # test marked for deletion - too simple and not informative
- def test_Seq_NA_DNA_inialize_with_0_args
+ # Testing Seq::NA::DNA#residues
+
+ def test_Seq_NA_DNA_residues
s = Seq::NA::DNA.new
- assert_equal( "", s.to_s )
- assert_equal( :DNA, s.seq_type )
- end
- # test marked for deletion - too simple and not informative
- def test_Seq_NA_DNA_inialize_with_1_args
- s = Seq::NA::DNA.new( "ATCG" )
- assert_equal( "ATCG", s.to_s )
- assert_equal( :DNA, s.seq_type )
+ assert_equal( %w{ A T C G }, s.residues )
end
# Testing Seq::NA::DNA#complement
def test_Seq_NA_DNA_complement_correct
- s = Seq::NA::DNA.new( "ATCG" )
- assert_equal( "TAGC", s.complement.to_s )
+ s = Seq::NA::DNA.new( "ATCGatcg" )
+ assert_equal( "TAGCtagc", s.complement.to_s )
end
- # Testing Seq::NA::RNA#initialize
+ # Testing Seq::NA::DNA#to_RNA
- # test marked for deletion - too simple and not informative
- def test_Seq_NA_RNA_inialize_with_0_args
- s = Seq::NA::RNA.new
- assert_equal( "", s.to_s )
- assert_equal( :RNA, s.seq_type )
+ def test_Seq_NA_DNA_to_RNA_returns_RNA_object
+ dna = Seq::NA::DNA.new( "ATCGatcg" )
+ rna = Seq::NA::RNA.new
+
+ new_rna = dna.to_RNA
+
+ assert_equal( rna.class, new_rna.class )
end
- # test marked for deletion - too simple and not informative
- def test_Seq_NA_RNA_inialize_with_1_args
- s = Seq::NA::RNA.new( "AUCG" )
- assert_equal( "AUCG", s.to_s )
- assert_equal( :RNA, s.seq_type )
+ def test_Seq_NA_DNA_to_RNA_is_correct
+ dna = Seq::NA::DNA.new( "ATCGatcg" )
+ rna = dna.to_RNA
+
+ assert_equal( "AUCGaucg", rna.to_s )
+ end
+
+ # Testing Seq::NA::RNA#residues
+
+ def test_Seq_NA_RNA_residues
+ s = Seq::NA::RNA.new
+
+ assert_equal( %w{ A U C G }, s.residues )
end
# Testing Seq::NA::RNA#complement
- def test_Seq_NA_DNA_complement_correct
- s = Seq::NA::RNA.new( "AUCG" )
- assert_equal( "UAGC", s.complement.to_s )
+ def test_Seq_NA_RNA_complement_correct
+ s = Seq::NA::RNA.new( "AUCGaucg" )
+ assert_equal( "UAGCuagc", s.complement.to_s )
+ end
+
+ # Testing Seq::NA::RNA#to_DNA
+
+ def test_Seq_NA_RNA_to_DNA_returns_DNA_object
+ rna = Seq::NA::RNA.new( "AUCGaucg" )
+ dna = Seq::NA::DNA.new
+
+ new_dna = rna.to_DNA
+
+ assert_equal( dna.class, new_dna.class )
+ end
+
+ def test_Seq_NA_RNA_to_DNA_is_correct
+ rna = Seq::NA::RNA.new( "AUCGaucg" )
+ dna = rna.to_DNA
+
+ assert_equal( "ATCGatcg", dna.to_s )
end
end