]> git.donarmstrong.com Git - biopieces.git/commitdiff
updated seq.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sat, 4 Jul 2009 14:02:37 +0000 (14:02 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sat, 4 Jul 2009 14:02:37 +0000 (14:02 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@554 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

index 8146ab4fc3c9409ac9274515330fd6b042f2228f..26bfee3f7c9e04faeb301b3d211ebbd8d8db35a9 100644 (file)
@@ -1,81 +1,53 @@
 # Class containing generic sequence methods and nucleic acid and amino acid subclasses.
 class Seq < String
-       attr_accessor :seq, :seq_type
-
-       # Method to initialize a new sequence.
-       def initialize( seq = "", seq_type = nil )
-               @seq      = seq
-               @seq_type = seq_type
-       end
-
-       # Method for outputting sequence as a String.
-       def to_s
-               @seq
-       end
-
        # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
-       def seq_type_guess
-               seq_beg = @seq[ 0, 100 ].upcase
+       def guess_type
+               raise ArgumentError, "No sequence." if self.empty?
+
+               seq_beg = self[ 0, 100 ].upcase
 
                if seq_beg.count( "FLPQIE" ) > 0
-                       :AA
+                       Seq::AA.new( self )
                elsif seq_beg.count( "U" ) > 0
-                       :RNA
+                       Seq::NA::RNA.new( self )
                else
-                       :DNA
+                       Seq::NA::DNA.new( self )
                end
        end
 
-       # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
-       def seq_type_guess!
-               @seq_type = seq_type_guess
-       end
-
-       # Method that return an array of the residue alphabet for a given sequence type.
-       def seq_alph( seq_type )
-               hash = {
-                       :DNA => %w{ A T C G },
-                       :RNA => %w{ A U C G },
-                       :AA  => %w{ F L S Y C W P H Q R I M T N K V A D E G },
-               }
-
-               raise "ERROR: Sequence type '#{ seq_type }' not recognized." unless hash.include?( seq_type )
-               return hash[ seq_type ]
-       end
-
        # Method to wrap a sequence to a given width using a given delimiter.
        def wrap( width = 80, delimit = "\n" )
-               raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum
-               raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0
-               @seq.tr!( " \t\n\r", '' )
-               @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ).sub( /#{ delimit }$/, "" )
+               raise ArgumentError, "Wrap width must be an integer." unless width.is_a? Fixnum
+               raise ArgumentError, "Cannot wrap sequence to negative width: #{ width }." if width <= 0
+
+               self.delete!( " \t\n\r" )
+               self.gsub( /.{#{ width }}(?!$)/, "\\0#{ delimit }" )
        end
 
        # Method to wrap and replace a sequence to a given width using a given delimiter.
        def wrap!( width = 80, delimit = "\n" )
-               @seq = wrap( width, delimit )
+               self.replace( self.wrap( width, delimit ) )
        end
 
        # Method that generates a random sequence of a given length.
        def generate( length )
-               raise "ERROR: Length must be an integer." unless length.is_a? Fixnum
-               raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0
+               raise ArgumentError, "Length must be an integer." unless length.is_a? Fixnum
+               raise ArgumentError, "Cannot generate negative sequence length: #{ length }." if length <= 0
 
-               alph = seq_alph( @seq_type )
-               seq  = Array.new( length ) { alph[ rand( alph.size ) ] }.join
+               alph = self.residues
+               Array.new( length ) { alph[ rand( alph.size ) ] }.join( "" )
        end
 
        # Method that replaces sequence with a randomly generated sequence of a given length.
        def generate!( length )
-               @seq = generate( length )
+               self.replace( self.generate( length ) )
        end
 
        # Class containing methods specific for amino acid (AA) sequences.
        class AA < Seq
-               # Method to initialize a new amino acid sequence.
-               def initialize( seq = "" )
-                       @seq      = seq
-                       @seq_type = :AA
+               # Method that returns an array of amino acid residues.
+               def residues
+                       %w{ F L S Y C W P H Q R I M T N K V A D E G }
                end
 
                # Calculate the molecular weight of an amino acid seuqunce.
@@ -83,6 +55,8 @@ class Seq < String
                # for amino bond formation and the MW used are somewhat imprecise:
                # http://www.expasy.ch/tools/pscale/Molecularweight.html
                def mol_weight
+                       raise ArgumentError, "invalid residues found: #{self.delete("#{residues.join( "" )}")}" if self.upcase =~ /[^#{residues.join( "" )}]/
+
                        mol_weight_aa = {
                                "A" => 89.000,    # Ala
                                "R" => 174.000,   # Arg
@@ -108,10 +82,7 @@ class Seq < String
 
                        mw = 0.0
 
-                       @seq.upcase.each_char do |c|
-                               raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c )
-                               mw += mol_weight_aa[ c ]
-                       end
+                       self.upcase.each_char { |c| mw += mol_weight_aa[ c ] }
 
                        mw
                end
@@ -121,29 +92,37 @@ class Seq < String
        class NA < Seq
                # Class containing methods specific for DNA sequences.
                class DNA < NA
-                       # Method to initialize a new DNA sequence.
-                       def initialize( seq = "" )
-                               @seq      = seq
-                               @seq_type = :DNA
+                       # Method that returns an array of DNA residues.
+                       def residues
+                               %w{ A T C G }
+                       end
+
+                       # Method to transcribe DNA to RNA.
+                       def to_RNA
+                               Seq::NA::RNA.new( self.tr( 'Tt', 'Uu' ) )
                        end
 
                        # Method that complements DNA sequence including ambiguity codes.
                        def complement
-                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+                               self.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
                        end
                end                     
 
                # Class containing methods specific for RNA sequences.
                class RNA < NA
-                       # Method to initialize a new RNA sequence.
-                       def initialize( seq = "" )
-                               @seq      = seq
-                               @seq_type = :RNA
+                       # Method that returns an array of RNA residues.
+                       def residues
+                               %w{ A U C G }
+                       end
+
+                       # Method to reverse transcribe RNA to DNA.
+                       def to_DNA
+                               Seq::NA::DNA.new( self.tr( 'Uu', 'Tt' ) )
                        end
 
                        # Method that complements RNA sequence including ambiguity codes.
                        def complement
-                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+                               self.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
                        end
                end
        end
index e6e28a2e8c985b058e0aa1b36e1aa046bb89bea2..ce6f4664753bb16012eb972b2ab12739d466d008 100755 (executable)
@@ -10,132 +10,69 @@ class TestSeq < Test::Unit::TestCase
        # test marked for deletion - too simple and not informative
        def test_Seq_initialize_with_0_args
                s = Seq.new
-               assert_equal( "", s.seq ) 
-               assert_equal( nil, s.seq_type ) 
+               assert_equal( "", s.to_s ) 
        end
 
        # test marked for deletion - too simple and not informative
        def test_Seq_initialize_with_1_args
                s = Seq.new( "ATCG" )
-               assert_equal( "ATCG", s.seq ) 
-               assert_equal( nil,    s.seq_type )
+               assert_equal( "ATCG", s.to_s ) 
        end
 
-       # test marked for deletion - too simple and informative
-       def test_Seq_initialize_with_2_args
-               s = Seq.new( "ATCG", :DNA )
-               assert_equal( "ATCG", s.seq ) 
-               assert_equal( :DNA, s.seq_type ) 
-       end 
-
        # Testing Seq#to_s
        def test_to_s
                s = Seq.new( "ATCG" )
                assert_equal( "ATCG", s.to_s )
        end
 
-       # Testing Seq#seq_type_guess
-
-       def test_seq_type_guess_DNA_uppercase
-               s = Seq.new( "ATCG" )
-
-               assert_equal( :DNA, s.seq_type_guess )
-       end
+       # Testing Seq#guess_type
 
-       def test_seq_type_guess_DNA_lowercase
-               s = Seq.new( "atcg" )
-
-               assert_equal( :DNA, s.seq_type_guess )
-       end
-
-       def test_seq_type_guess_RNA_uppercase
-               s = Seq.new( "AUCG" )
-               assert_equal( :RNA, s.seq_type_guess )
-       end
-
-       def test_seq_type_guess_RNA_lowercase
-               s = Seq.new( "aucg" )
-               assert_equal( :RNA, s.seq_type_guess )
-       end
-
-       def test_seq_type_guess_AA_uppercase
-               s = Seq.new( "SEQ" )
-               assert_equal( :AA, s.seq_type_guess )
-       end
-
-       def test_seq_type_guess_AA_lowercase
-               s = Seq.new( "seq" )
-               assert_equal( :AA, s.seq_type_guess )
-       end
-
-       def test_seq_type_guess_dont_change_instance_var
-               s = Seq.new( "seq" )
-               s.seq_type_guess
-               assert_equal( nil, s.seq_type )
-       end
-
-       # Testing Seq#seq_type_guess!
-
-       def test_seq_type_guess_DNA_uppercase!
-               s = Seq.new( "ATCG" )
-               s.seq_type_guess!
-               assert_equal( :DNA, s.seq_type )
-       end
+       def test_guess_type_raise_if_no_sequence
+               s = Seq.new
 
-       def test_seq_type_guess_DNA_lowercase!
-               s = Seq.new( "atcg" )
-               s.seq_type_guess!
-               assert_equal( :DNA, s.seq_type )
+               assert_raise( ArgumentError ) { s.guess_type }
        end
 
-       def test_seq_type_guess_RNA_uppercase!
-               s = Seq.new( "AUCG" )
-               s.seq_type_guess!
-               assert_equal( :RNA, s.seq_type )
-       end
+       def test_guess_type_AA_uppercase
+               s1 = Seq.new( "SEQ" )
+               s2 = Seq::AA.new( "SEQ" )
 
-       def test_seq_type_guess_RNA_lowercase!
-               s = Seq.new( "aucg" )
-               s.seq_type_guess!
-               assert_equal( :RNA, s.seq_type )
+               assert_equal( s1.guess_type.class, s2.class )
        end
 
-       def test_seq_type_guess_AA_uppercase!
-               s = Seq.new( "SEQ" )
-               s.seq_type_guess!
-               assert_equal( :AA, s.seq_type )
-       end
+       def test_guess_type_AA_lowercase
+               s1 = Seq.new( "seq" )
+               s2 = Seq::AA.new( "seq" )
 
-       def test_seq_type_guess_AA_lowercase!
-               s = Seq.new( "seq" )
-               s.seq_type_guess!
-               assert_equal( :AA, s.seq_type )
+               assert_equal( s1.guess_type.class, s2.class )
        end
 
-       # Testing Seq#seq_alph
-
-       def test_seq_alph_DNA
-               s = Seq.new
+       def test_guess_type_DNA_uppercase
+               s1 = Seq.new( "ATCG" )
+               s2 = Seq::NA::DNA.new( "ATCG" )
 
-               assert_equal( %w{ A T C G }, s.seq_alph( :DNA ) )
+               assert_equal( s1.guess_type.class, s2.class )
        end
 
-       def test_seq_alph_RNA
-               s = Seq.new
+       def test_guess_type_DNA_lowercase
+               s1 = Seq.new( "atcg" )
+               s2 = Seq::NA::DNA.new( "atcg" )
 
-               assert_equal( %w{ A U C G }, s.seq_alph( :RNA ) )
+               assert_equal( s1.guess_type.class, s2.class )
        end
 
-       def test_seq_alph_AA
-               s = Seq.new
+       def test_guess_type_RNA_uppercase
+               s1 = Seq.new( "AUCG" )
+               s2 = Seq::NA::RNA.new( "AUCG" )
 
-               assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.seq_alph( :AA ) )
+               assert_equal( s1.guess_type.class, s2.class )
        end
 
-       def test_seq_alph_invalid
-               s = Seq.new
+       def test_guess_type_RNA_lowercase
+               s1 = Seq.new( "aucg" )
+               s2 = Seq::NA::RNA.new( "aucg" )
 
-               assert_raise( RuntimeError ) { s.seq_alph( "DNA" ) } # seq_alph wants a symbol
+               assert_equal( s1.guess_type.class, s2.class )
        end
 
        # Testing Seq#wrap
@@ -143,14 +80,14 @@ class TestSeq < Test::Unit::TestCase
        def test_wrap_arg_is_a_number
                s = Seq.new
 
-               assert_raise( RuntimeError ) { s.wrap( "FOO" ) }
+               assert_raise( ArgumentError ) { s.wrap( "FOO" ) }
        end
 
        def test_wrap_arg_is_a_positive_number
                s = Seq.new
 
-               assert_raise( RuntimeError ) { s.wrap( 0 ) }
-               assert_raise( RuntimeError ) { s.wrap( -10 ) }
+               assert_raise( ArgumentError ) { s.wrap( 0 ) }
+               assert_raise( ArgumentError ) { s.wrap( -10 ) }
        end
 
        def test_wrap_with_0_args
@@ -210,18 +147,18 @@ class TestSeq < Test::Unit::TestCase
        def test_generate_arg_is_a_number
                s = Seq.new
 
-               assert_raise( RuntimeError ) { s.generate( "FOO" ) }
+               assert_raise( ArgumentError ) { s.generate( "FOO" ) }
        end
 
        def test_generate_arg_is_a_positive_number
                s = Seq.new
 
-               assert_raise( RuntimeError ) { s.generate( 0 ) }
-               assert_raise( RuntimeError ) { s.generate( -10 ) }
+               assert_raise( ArgumentError ) { s.generate( 0 ) }
+               assert_raise( ArgumentError ) { s.generate( -10 ) }
        end
 
        def test_generate
-               s = Seq.new( "", :AA )
+               s = Seq::AA.new
 
                seq = s.generate( 40 )
 
@@ -229,7 +166,7 @@ class TestSeq < Test::Unit::TestCase
        end
 
        def test_generate_dont_change_instance_var
-               s = Seq.new( "", :AA )
+               s = Seq::AA.new
 
                seq = s.generate( 40 )
 
@@ -239,87 +176,102 @@ class TestSeq < Test::Unit::TestCase
        # Testing Seq#generate!
 
        def test_generate!
-               s = Seq.new( "", :AA )
-
-               gen_seq = s.generate!( 40 ).to_s
+               s = Seq::AA.new
 
-               assert_equal( 40, gen_seq.length )
-               assert_equal( 40, s.seq.length )
-               assert_equal( gen_seq, s.to_s )
+               s.generate!( 40 )
+       
+               assert_equal( 40, s.length )
        end
 
-       # Testing Seq::AA#initialize
+       # Testing Seq::AA#residues
 
-       # test marked for deletion - too simple and not informative
-       def test_Seq_AA_initialize_with_0_args
+       def test_Seq_AA_residues
                s = Seq::AA.new
-               assert_equal( "", s.to_s )
-               assert_equal( :AA, s.seq_type )
-       end
 
-       # test marked for deletion - too simple and not informative
-       def test_Seq_AA_initialize_with_1_args
-               s = Seq::AA.new( "SEQ" )
-               assert_equal( "SEQ", s.to_s )
-               assert_equal( :AA,  s.seq_type )
+               assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.residues )
        end
 
        # Testing Seq::AA#mol_weight
 
-       def test_Seq_aa_mol_wight_bad_residue
+       def test_Seq_aa_mol_weight_bad_residue
                s = Seq::AA.new( "7" )
-               assert_raise( RuntimeError ) { s.mol_weight }
+               assert_raise( ArgumentError ) { s.mol_weight }
        end
 
-       def test_Seq_aa_mol_wight_return_correct
+       def test_Seq_aa_mol_wight_return_correct_uppercase
                s = Seq::AA.new( "SEQ" )
                assert_equal( 398.0, s.mol_weight )
        end
 
-       # Testing Seq::NA::DNA#initialize
+       def test_Seq_aa_mol_wight_return_correct_lowercase
+               s = Seq::AA.new( "seq" )
+               assert_equal( 398.0, s.mol_weight )
+       end
 
-       # test marked for deletion - too simple and not informative
-       def test_Seq_NA_DNA_inialize_with_0_args
+       # Testing Seq::NA::DNA#residues
+
+       def test_Seq_NA_DNA_residues
                s = Seq::NA::DNA.new
-               assert_equal( "", s.to_s )
-               assert_equal( :DNA, s.seq_type )
-       end
 
-       # test marked for deletion - too simple and not informative
-       def test_Seq_NA_DNA_inialize_with_1_args
-               s = Seq::NA::DNA.new( "ATCG" )
-               assert_equal( "ATCG", s.to_s )
-               assert_equal( :DNA, s.seq_type )
+               assert_equal( %w{ A T C G }, s.residues )
        end
 
        # Testing Seq::NA::DNA#complement
 
        def test_Seq_NA_DNA_complement_correct
-               s = Seq::NA::DNA.new( "ATCG" )
-               assert_equal( "TAGC", s.complement.to_s )
+               s = Seq::NA::DNA.new( "ATCGatcg" )
+               assert_equal( "TAGCtagc", s.complement.to_s )
        end
 
-       # Testing Seq::NA::RNA#initialize
+       # Testing Seq::NA::DNA#to_RNA
 
-       # test marked for deletion - too simple and not informative
-       def test_Seq_NA_RNA_inialize_with_0_args
-               s = Seq::NA::RNA.new
-               assert_equal( "", s.to_s )
-               assert_equal( :RNA, s.seq_type )
+       def test_Seq_NA_DNA_to_RNA_returns_RNA_object
+               dna = Seq::NA::DNA.new( "ATCGatcg" )
+               rna = Seq::NA::RNA.new
+               
+               new_rna = dna.to_RNA
+
+               assert_equal( rna.class, new_rna.class )
        end
 
-       # test marked for deletion - too simple and not informative
-       def test_Seq_NA_RNA_inialize_with_1_args
-               s = Seq::NA::RNA.new( "AUCG" )
-               assert_equal( "AUCG", s.to_s )
-               assert_equal( :RNA, s.seq_type )
+       def test_Seq_NA_DNA_to_RNA_is_correct
+               dna = Seq::NA::DNA.new( "ATCGatcg" )
+               rna = dna.to_RNA
+
+               assert_equal( "AUCGaucg", rna.to_s )
+       end
+
+       # Testing Seq::NA::RNA#residues
+
+       def test_Seq_NA_RNA_residues
+               s = Seq::NA::RNA.new
+
+               assert_equal( %w{ A U C G }, s.residues )
        end
 
        # Testing Seq::NA::RNA#complement
 
-       def test_Seq_NA_DNA_complement_correct
-               s = Seq::NA::RNA.new( "AUCG" )
-               assert_equal( "UAGC", s.complement.to_s )
+       def test_Seq_NA_RNA_complement_correct
+               s = Seq::NA::RNA.new( "AUCGaucg" )
+               assert_equal( "UAGCuagc", s.complement.to_s )
+       end
+
+       # Testing Seq::NA::RNA#to_DNA
+
+       def test_Seq_NA_RNA_to_DNA_returns_DNA_object
+               rna = Seq::NA::RNA.new( "AUCGaucg" )
+               dna = Seq::NA::DNA.new
+               
+               new_dna = rna.to_DNA
+
+               assert_equal( dna.class, new_dna.class )
+       end
+
+       def test_Seq_NA_RNA_to_DNA_is_correct
+               rna = Seq::NA::RNA.new( "AUCGaucg" )
+               dna = rna.to_DNA
+
+               assert_equal( "ATCGatcg", dna.to_s )
        end
 end