added unit testing for ruby

author martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>

Tue, 30 Jun 2009 17:01:34 +0000 (17:01 +0000)

committer martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>

Tue, 30 Jun 2009 17:01:34 +0000 (17:01 +0000)
author martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 30 Jun 2009 17:01:34 +0000 (17:01 +0000)
committer martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 30 Jun 2009 17:01:34 +0000 (17:01 +0000)
diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb

new file mode 100644 (file)

index 0000000..ddfd4cd
--- /dev/null
+++ b/code_ruby/Maasha/lib/seq.rb
@@ -0,0 +1,135 @@
+# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
+class Seq < String
+       attr_accessor :seq, :seq_type
+
+       # Method to initialize a new sequence.
+       def initialize( seq = nil, seq_type = nil )
+               @seq      = seq
+               @seq_type = seq_type
+       end
+
+       # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+       def seq_type_guess
+               seq_beg = @seq[ 0, 100 ].upcase
+
+               if seq_beg.count( "FLPQIE" ) > 0
+                       "AA"
+               elsif seq_beg.count( "U" ) > 0
+                       "RNA"
+               else
+                       "DNA"
+               end
+       end
+
+       # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+       def seq_type_guess!
+               @seq_type = seq_type_guess
+       end
+
+       # Method that return an array of the residue alphabet for a given sequence type.
+       def seq_alph( seq_type )
+               case seq_type.upcase
+               when 'DNA'
+                       %w{ A T C G }
+               when 'RNA'
+                       %w{ A U C G }
+               when 'AA'
+                       %w{ F L S Y C W P H Q R I M T N K V A D E G }
+               else
+                       raise "ERROR: Sequence type '#{ seq_type }' not recognized."
+               end
+       end
+
+       # Method to wrap a sequence to a given width using a given delimiter.
+       def wrap( width = 80, delimit = "\n" )
+               raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum
+               raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0
+               @seq.tr!( " \t\n\r", '' )
+               @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" )
+       end
+
+       # Method to wrap and replace a sequence to a given width using a given delimiter.
+       def wrap!( width = 80, delimit = "\n" )
+               @seq = wrap( width, delimit )
+       end
+
+       # Method that generates a random sequence of a given length.
+       def generate( length )
+               raise "ERROR: Length must be an integer." unless length.is_a? Fixnum
+               raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0
+
+               alph = seq_alph( @seq_type )
+               seq  = Array.new( length ) { alph[ rand( alph.size ) ] }.join
+       end
+
+       # Method that replaces sequence with a randomly generated sequence of a given length.
+       def generate!( length )
+               @seq = generate( length )
+       end
+
+       # Class containing methods specific for amino acid (AA) sequences.
+       class AA < Seq
+               # Method to initialize a new amino acid sequence.
+               def initialize( seq = nil )
+                       @seq      = seq
+                       @seq_type = "AA"
+               end
+
+               # Calculate the molecular weight of an amino acid seuqunce.
+               # The caluculation is only approximate since there is no correction
+               # for amino bond formation and the MW used are somewhat imprecise:
+               # http://www.expasy.ch/tools/pscale/Molecularweight.html
+               def mol_weight
+                       mol_weight_aa = {
+                               "A" => 89.000,    # Ala
+                               "R" => 174.000,   # Arg
+                               "N" => 132.000,   # Asn
+                               "D" => 133.000,   # Asp
+                               "C" => 121.000,   # Cys
+                               "Q" => 146.000,   # Gln
+                               "E" => 147.000,   # Glu
+                               "G" => 75.000,    # Gly
+                               "H" => 155.000,   # His
+                               "I" => 131.000,   # Ile
+                               "L" => 131.000,   # Leu
+                               "K" => 146.000,   # Lys
+                               "M" => 149.000,   # Met
+                               "F" => 165.000,   # Phe
+                               "P" => 115.000,   # Pro
+                               "S" => 105.000,   # Ser
+                               "T" => 119.000,   # Thr
+                               "W" => 204.000,   # Trp
+                               "Y" => 181.000,   # Tyr
+                               "V" => 117.000,   # Val
+                       }
+
+                       mw = 0.0
+
+                       @seq.upcase.each_char do |c|
+                               raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c )
+                               mw += mol_weight_aa[ c ]
+                       end
+
+                       mw
+               end
+       end
+
+       # Class containing methods specific for nucleic acid (NA) sequences.
+       class NA < Seq
+               # Class containing methods specific for DNA sequences.
+               class DNA < NA
+                       # Method that complements DNA sequence including ambiguity codes.
+                       def complement
+                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+                       end
+               end                     
+
+               # Class containing methods specific for RNA sequences.
+               class RNA < NA
+                       # Method that complements RNA sequence including ambiguity codes.
+                       def complement
+                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+                       end
+               end
+       end
+end
diff --git a/code_ruby/Maasha/seq.rb b/code_ruby/Maasha/seq.rb

deleted file mode 100644 (file)

index 3324ff1..0000000
--- a/code_ruby/Maasha/seq.rb
+++ /dev/null
@@ -1,100 +0,0 @@
-# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
-class Seq < String
-       attr_accessor :seq, :seq_type
-
-       # Method to initialize a new sequence.
-       def initialize( seq = nil, seq_type = nil )
-               @seq      = seq
-               @seq_type = seq_type
-       end
-
-       # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
-       def seq_type?
-               seq_beg = @seq[ 0, 100 ].upcase
-
-               if seq_beg.count( "FLPQIE" ) > 0
-                       "PROTEIN"
-               elsif seq_beg.count( "U" ) > 0
-                       "RNA"
-               else
-                       "DNA"
-               end
-       end
-
-       # Method that return an array of the residue alphabet for a given sequence type.
-       def seq_alph( seq_type )
-               @seq_type = seq_type.upcase!
-
-               case seq_type
-               when 'DNA'
-                       %w{ A T C G }
-               when 'RNA'
-                       %w{ A U C G }
-               when 'PROTEIN'
-                       %w{ F L S Y C W P H Q R I M T N K V A D E G }
-               else
-                       raise "ERROR: sequence type '#{ seq_type }' not recognized."
-               end
-       end
-
-       # Method to wrap a sequence to a given width using a given delimiter.
-       def wrap( width = 80, delimit = "\n" )
-               @width   = width
-               @delimit = delimit
-
-               @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" )
-       end
-
-       # Method to wrap and replace a sequence to a given width using a given delimiter.
-       def wrap!( width = 80, delimit = "\n" )
-               @width   = width
-               @delimit = delimit
-
-               @seq.gsub!( /.{#{ width }}/, "\\0#{ delimit }" )
-       end
-
-       # Method that generates a random sequence of a given length.
-       def generate( seq_type, length )
-               @seq_type = seq_type
-               @length   = length
-
-               raise "ERROR: length must be greater than 0 - not #{ length }." if length <= 0
-
-               alph = seq_alph( seq_type )
-               seq  = ""
-
-               seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
-       end
-
-       # Method that replaces sequence with a randomly generated sequence of a given length.
-       def generate!( seq_type, length )
-               @seq_type = seq_type
-               @length   = length
-               @seq      = generate( seq_type, length )
-       end
-
-       # Class containing methods specific for nucleic acid (NA) sequences.
-       class NA < Seq
-               # Class containing methods specific for DNA sequences.
-               class DNA < NA
-                       # Method that complements DNA sequence including ambiguity codes.
-                       def complement
-                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
-                       end
-               end                     
-
-               # Class containing methods specific for RNA sequences.
-               class RNA < NA
-                       # Method that complements RNA sequence including ambiguity codes.
-                       def complement
-                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
-                       end
-               end
-       end
-
-       # Class containing methods specific for amino acid (AA) sequences.
-       class AA < Seq
-               # orf call
-       end
-end
-
author	martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
	Tue, 30 Jun 2009 17:01:34 +0000 (17:01 +0000)
committer	martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
	Tue, 30 Jun 2009 17:01:34 +0000 (17:01 +0000)
code_ruby/Maasha/lib/seq.rb	[new file with mode: 0644]	patch \| blob
code_ruby/Maasha/seq.rb	[deleted file]	patch \| blob \| history