From 6dff5c02794473604e0c6590e0739fba5ad87cd2 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Sat, 4 Jul 2009 14:02:37 +0000 Subject: [PATCH] updated seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@554 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/Maasha/lib/seq.rb | 105 +++++-------- code_ruby/Maasha/test/test_seq.rb | 248 ++++++++++++------------------ 2 files changed, 142 insertions(+), 211 deletions(-) diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index 8146ab4..26bfee3 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -1,81 +1,53 @@ # Class containing generic sequence methods and nucleic acid and amino acid subclasses. class Seq < String - attr_accessor :seq, :seq_type - - # Method to initialize a new sequence. - def initialize( seq = "", seq_type = nil ) - @seq = seq - @seq_type = seq_type - end - - # Method for outputting sequence as a String. - def to_s - @seq - end - # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes. - def seq_type_guess - seq_beg = @seq[ 0, 100 ].upcase + def guess_type + raise ArgumentError, "No sequence." if self.empty? + + seq_beg = self[ 0, 100 ].upcase if seq_beg.count( "FLPQIE" ) > 0 - :AA + Seq::AA.new( self ) elsif seq_beg.count( "U" ) > 0 - :RNA + Seq::NA::RNA.new( self ) else - :DNA + Seq::NA::DNA.new( self ) end end - # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes. - def seq_type_guess! - @seq_type = seq_type_guess - end - - # Method that return an array of the residue alphabet for a given sequence type. - def seq_alph( seq_type ) - hash = { - :DNA => %w{ A T C G }, - :RNA => %w{ A U C G }, - :AA => %w{ F L S Y C W P H Q R I M T N K V A D E G }, - } - - raise "ERROR: Sequence type '#{ seq_type }' not recognized." unless hash.include?( seq_type ) - return hash[ seq_type ] - end - # Method to wrap a sequence to a given width using a given delimiter. def wrap( width = 80, delimit = "\n" ) - raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum - raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0 - @seq.tr!( " \t\n\r", '' ) - @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ).sub( /#{ delimit }$/, "" ) + raise ArgumentError, "Wrap width must be an integer." unless width.is_a? Fixnum + raise ArgumentError, "Cannot wrap sequence to negative width: #{ width }." if width <= 0 + + self.delete!( " \t\n\r" ) + self.gsub( /.{#{ width }}(?!$)/, "\\0#{ delimit }" ) end # Method to wrap and replace a sequence to a given width using a given delimiter. def wrap!( width = 80, delimit = "\n" ) - @seq = wrap( width, delimit ) + self.replace( self.wrap( width, delimit ) ) end # Method that generates a random sequence of a given length. def generate( length ) - raise "ERROR: Length must be an integer." unless length.is_a? Fixnum - raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0 + raise ArgumentError, "Length must be an integer." unless length.is_a? Fixnum + raise ArgumentError, "Cannot generate negative sequence length: #{ length }." if length <= 0 - alph = seq_alph( @seq_type ) - seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join + alph = self.residues + Array.new( length ) { alph[ rand( alph.size ) ] }.join( "" ) end # Method that replaces sequence with a randomly generated sequence of a given length. def generate!( length ) - @seq = generate( length ) + self.replace( self.generate( length ) ) end # Class containing methods specific for amino acid (AA) sequences. class AA < Seq - # Method to initialize a new amino acid sequence. - def initialize( seq = "" ) - @seq = seq - @seq_type = :AA + # Method that returns an array of amino acid residues. + def residues + %w{ F L S Y C W P H Q R I M T N K V A D E G } end # Calculate the molecular weight of an amino acid seuqunce. @@ -83,6 +55,8 @@ class Seq < String # for amino bond formation and the MW used are somewhat imprecise: # http://www.expasy.ch/tools/pscale/Molecularweight.html def mol_weight + raise ArgumentError, "invalid residues found: #{self.delete("#{residues.join( "" )}")}" if self.upcase =~ /[^#{residues.join( "" )}]/ + mol_weight_aa = { "A" => 89.000, # Ala "R" => 174.000, # Arg @@ -108,10 +82,7 @@ class Seq < String mw = 0.0 - @seq.upcase.each_char do |c| - raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c ) - mw += mol_weight_aa[ c ] - end + self.upcase.each_char { |c| mw += mol_weight_aa[ c ] } mw end @@ -121,29 +92,37 @@ class Seq < String class NA < Seq # Class containing methods specific for DNA sequences. class DNA < NA - # Method to initialize a new DNA sequence. - def initialize( seq = "" ) - @seq = seq - @seq_type = :DNA + # Method that returns an array of DNA residues. + def residues + %w{ A T C G } + end + + # Method to transcribe DNA to RNA. + def to_RNA + Seq::NA::RNA.new( self.tr( 'Tt', 'Uu' ) ) end # Method that complements DNA sequence including ambiguity codes. def complement - @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) + self.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) end end # Class containing methods specific for RNA sequences. class RNA < NA - # Method to initialize a new RNA sequence. - def initialize( seq = "" ) - @seq = seq - @seq_type = :RNA + # Method that returns an array of RNA residues. + def residues + %w{ A U C G } + end + + # Method to reverse transcribe RNA to DNA. + def to_DNA + Seq::NA::DNA.new( self.tr( 'Uu', 'Tt' ) ) end # Method that complements RNA sequence including ambiguity codes. def complement - @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) + self.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) end end end diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index e6e28a2..ce6f466 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -10,132 +10,69 @@ class TestSeq < Test::Unit::TestCase # test marked for deletion - too simple and not informative def test_Seq_initialize_with_0_args s = Seq.new - assert_equal( "", s.seq ) - assert_equal( nil, s.seq_type ) + assert_equal( "", s.to_s ) end # test marked for deletion - too simple and not informative def test_Seq_initialize_with_1_args s = Seq.new( "ATCG" ) - assert_equal( "ATCG", s.seq ) - assert_equal( nil, s.seq_type ) + assert_equal( "ATCG", s.to_s ) end - # test marked for deletion - too simple and informative - def test_Seq_initialize_with_2_args - s = Seq.new( "ATCG", :DNA ) - assert_equal( "ATCG", s.seq ) - assert_equal( :DNA, s.seq_type ) - end - # Testing Seq#to_s def test_to_s s = Seq.new( "ATCG" ) assert_equal( "ATCG", s.to_s ) end - # Testing Seq#seq_type_guess - - def test_seq_type_guess_DNA_uppercase - s = Seq.new( "ATCG" ) - - assert_equal( :DNA, s.seq_type_guess ) - end + # Testing Seq#guess_type - def test_seq_type_guess_DNA_lowercase - s = Seq.new( "atcg" ) - - assert_equal( :DNA, s.seq_type_guess ) - end - - def test_seq_type_guess_RNA_uppercase - s = Seq.new( "AUCG" ) - assert_equal( :RNA, s.seq_type_guess ) - end - - def test_seq_type_guess_RNA_lowercase - s = Seq.new( "aucg" ) - assert_equal( :RNA, s.seq_type_guess ) - end - - def test_seq_type_guess_AA_uppercase - s = Seq.new( "SEQ" ) - assert_equal( :AA, s.seq_type_guess ) - end - - def test_seq_type_guess_AA_lowercase - s = Seq.new( "seq" ) - assert_equal( :AA, s.seq_type_guess ) - end - - def test_seq_type_guess_dont_change_instance_var - s = Seq.new( "seq" ) - s.seq_type_guess - assert_equal( nil, s.seq_type ) - end - - # Testing Seq#seq_type_guess! - - def test_seq_type_guess_DNA_uppercase! - s = Seq.new( "ATCG" ) - s.seq_type_guess! - assert_equal( :DNA, s.seq_type ) - end + def test_guess_type_raise_if_no_sequence + s = Seq.new - def test_seq_type_guess_DNA_lowercase! - s = Seq.new( "atcg" ) - s.seq_type_guess! - assert_equal( :DNA, s.seq_type ) + assert_raise( ArgumentError ) { s.guess_type } end - def test_seq_type_guess_RNA_uppercase! - s = Seq.new( "AUCG" ) - s.seq_type_guess! - assert_equal( :RNA, s.seq_type ) - end + def test_guess_type_AA_uppercase + s1 = Seq.new( "SEQ" ) + s2 = Seq::AA.new( "SEQ" ) - def test_seq_type_guess_RNA_lowercase! - s = Seq.new( "aucg" ) - s.seq_type_guess! - assert_equal( :RNA, s.seq_type ) + assert_equal( s1.guess_type.class, s2.class ) end - def test_seq_type_guess_AA_uppercase! - s = Seq.new( "SEQ" ) - s.seq_type_guess! - assert_equal( :AA, s.seq_type ) - end + def test_guess_type_AA_lowercase + s1 = Seq.new( "seq" ) + s2 = Seq::AA.new( "seq" ) - def test_seq_type_guess_AA_lowercase! - s = Seq.new( "seq" ) - s.seq_type_guess! - assert_equal( :AA, s.seq_type ) + assert_equal( s1.guess_type.class, s2.class ) end - # Testing Seq#seq_alph - - def test_seq_alph_DNA - s = Seq.new + def test_guess_type_DNA_uppercase + s1 = Seq.new( "ATCG" ) + s2 = Seq::NA::DNA.new( "ATCG" ) - assert_equal( %w{ A T C G }, s.seq_alph( :DNA ) ) + assert_equal( s1.guess_type.class, s2.class ) end - def test_seq_alph_RNA - s = Seq.new + def test_guess_type_DNA_lowercase + s1 = Seq.new( "atcg" ) + s2 = Seq::NA::DNA.new( "atcg" ) - assert_equal( %w{ A U C G }, s.seq_alph( :RNA ) ) + assert_equal( s1.guess_type.class, s2.class ) end - def test_seq_alph_AA - s = Seq.new + def test_guess_type_RNA_uppercase + s1 = Seq.new( "AUCG" ) + s2 = Seq::NA::RNA.new( "AUCG" ) - assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.seq_alph( :AA ) ) + assert_equal( s1.guess_type.class, s2.class ) end - def test_seq_alph_invalid - s = Seq.new + def test_guess_type_RNA_lowercase + s1 = Seq.new( "aucg" ) + s2 = Seq::NA::RNA.new( "aucg" ) - assert_raise( RuntimeError ) { s.seq_alph( "DNA" ) } # seq_alph wants a symbol + assert_equal( s1.guess_type.class, s2.class ) end # Testing Seq#wrap @@ -143,14 +80,14 @@ class TestSeq < Test::Unit::TestCase def test_wrap_arg_is_a_number s = Seq.new - assert_raise( RuntimeError ) { s.wrap( "FOO" ) } + assert_raise( ArgumentError ) { s.wrap( "FOO" ) } end def test_wrap_arg_is_a_positive_number s = Seq.new - assert_raise( RuntimeError ) { s.wrap( 0 ) } - assert_raise( RuntimeError ) { s.wrap( -10 ) } + assert_raise( ArgumentError ) { s.wrap( 0 ) } + assert_raise( ArgumentError ) { s.wrap( -10 ) } end def test_wrap_with_0_args @@ -210,18 +147,18 @@ class TestSeq < Test::Unit::TestCase def test_generate_arg_is_a_number s = Seq.new - assert_raise( RuntimeError ) { s.generate( "FOO" ) } + assert_raise( ArgumentError ) { s.generate( "FOO" ) } end def test_generate_arg_is_a_positive_number s = Seq.new - assert_raise( RuntimeError ) { s.generate( 0 ) } - assert_raise( RuntimeError ) { s.generate( -10 ) } + assert_raise( ArgumentError ) { s.generate( 0 ) } + assert_raise( ArgumentError ) { s.generate( -10 ) } end def test_generate - s = Seq.new( "", :AA ) + s = Seq::AA.new seq = s.generate( 40 ) @@ -229,7 +166,7 @@ class TestSeq < Test::Unit::TestCase end def test_generate_dont_change_instance_var - s = Seq.new( "", :AA ) + s = Seq::AA.new seq = s.generate( 40 ) @@ -239,87 +176,102 @@ class TestSeq < Test::Unit::TestCase # Testing Seq#generate! def test_generate! - s = Seq.new( "", :AA ) - - gen_seq = s.generate!( 40 ).to_s + s = Seq::AA.new - assert_equal( 40, gen_seq.length ) - assert_equal( 40, s.seq.length ) - assert_equal( gen_seq, s.to_s ) + s.generate!( 40 ) + + assert_equal( 40, s.length ) end - # Testing Seq::AA#initialize + # Testing Seq::AA#residues - # test marked for deletion - too simple and not informative - def test_Seq_AA_initialize_with_0_args + def test_Seq_AA_residues s = Seq::AA.new - assert_equal( "", s.to_s ) - assert_equal( :AA, s.seq_type ) - end - # test marked for deletion - too simple and not informative - def test_Seq_AA_initialize_with_1_args - s = Seq::AA.new( "SEQ" ) - assert_equal( "SEQ", s.to_s ) - assert_equal( :AA, s.seq_type ) + assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.residues ) end # Testing Seq::AA#mol_weight - def test_Seq_aa_mol_wight_bad_residue + def test_Seq_aa_mol_weight_bad_residue s = Seq::AA.new( "7" ) - assert_raise( RuntimeError ) { s.mol_weight } + assert_raise( ArgumentError ) { s.mol_weight } end - def test_Seq_aa_mol_wight_return_correct + def test_Seq_aa_mol_wight_return_correct_uppercase s = Seq::AA.new( "SEQ" ) assert_equal( 398.0, s.mol_weight ) end - # Testing Seq::NA::DNA#initialize + def test_Seq_aa_mol_wight_return_correct_lowercase + s = Seq::AA.new( "seq" ) + assert_equal( 398.0, s.mol_weight ) + end - # test marked for deletion - too simple and not informative - def test_Seq_NA_DNA_inialize_with_0_args + # Testing Seq::NA::DNA#residues + + def test_Seq_NA_DNA_residues s = Seq::NA::DNA.new - assert_equal( "", s.to_s ) - assert_equal( :DNA, s.seq_type ) - end - # test marked for deletion - too simple and not informative - def test_Seq_NA_DNA_inialize_with_1_args - s = Seq::NA::DNA.new( "ATCG" ) - assert_equal( "ATCG", s.to_s ) - assert_equal( :DNA, s.seq_type ) + assert_equal( %w{ A T C G }, s.residues ) end # Testing Seq::NA::DNA#complement def test_Seq_NA_DNA_complement_correct - s = Seq::NA::DNA.new( "ATCG" ) - assert_equal( "TAGC", s.complement.to_s ) + s = Seq::NA::DNA.new( "ATCGatcg" ) + assert_equal( "TAGCtagc", s.complement.to_s ) end - # Testing Seq::NA::RNA#initialize + # Testing Seq::NA::DNA#to_RNA - # test marked for deletion - too simple and not informative - def test_Seq_NA_RNA_inialize_with_0_args - s = Seq::NA::RNA.new - assert_equal( "", s.to_s ) - assert_equal( :RNA, s.seq_type ) + def test_Seq_NA_DNA_to_RNA_returns_RNA_object + dna = Seq::NA::DNA.new( "ATCGatcg" ) + rna = Seq::NA::RNA.new + + new_rna = dna.to_RNA + + assert_equal( rna.class, new_rna.class ) end - # test marked for deletion - too simple and not informative - def test_Seq_NA_RNA_inialize_with_1_args - s = Seq::NA::RNA.new( "AUCG" ) - assert_equal( "AUCG", s.to_s ) - assert_equal( :RNA, s.seq_type ) + def test_Seq_NA_DNA_to_RNA_is_correct + dna = Seq::NA::DNA.new( "ATCGatcg" ) + rna = dna.to_RNA + + assert_equal( "AUCGaucg", rna.to_s ) + end + + # Testing Seq::NA::RNA#residues + + def test_Seq_NA_RNA_residues + s = Seq::NA::RNA.new + + assert_equal( %w{ A U C G }, s.residues ) end # Testing Seq::NA::RNA#complement - def test_Seq_NA_DNA_complement_correct - s = Seq::NA::RNA.new( "AUCG" ) - assert_equal( "UAGC", s.complement.to_s ) + def test_Seq_NA_RNA_complement_correct + s = Seq::NA::RNA.new( "AUCGaucg" ) + assert_equal( "UAGCuagc", s.complement.to_s ) + end + + # Testing Seq::NA::RNA#to_DNA + + def test_Seq_NA_RNA_to_DNA_returns_DNA_object + rna = Seq::NA::RNA.new( "AUCGaucg" ) + dna = Seq::NA::DNA.new + + new_dna = rna.to_DNA + + assert_equal( dna.class, new_dna.class ) + end + + def test_Seq_NA_RNA_to_DNA_is_correct + rna = Seq::NA::RNA.new( "AUCGaucg" ) + dna = rna.to_DNA + + assert_equal( "ATCGatcg", dna.to_s ) end end -- 2.39.5