From: martinahansen Date: Tue, 30 Jun 2009 17:01:34 +0000 (+0000) Subject: added unit testing for ruby X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=21a290db57a8e0a30192e910a376ffa1734f3e8d;p=biopieces.git added unit testing for ruby git-svn-id: http://biopieces.googlecode.com/svn/trunk@545 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb new file mode 100644 index 0000000..ddfd4cd --- /dev/null +++ b/code_ruby/Maasha/lib/seq.rb @@ -0,0 +1,135 @@ +# Class containing generic sequence methods and nucleic acid and amino acid subclasses. +class Seq < String + attr_accessor :seq, :seq_type + + # Method to initialize a new sequence. + def initialize( seq = nil, seq_type = nil ) + @seq = seq + @seq_type = seq_type + end + + # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes. + def seq_type_guess + seq_beg = @seq[ 0, 100 ].upcase + + if seq_beg.count( "FLPQIE" ) > 0 + "AA" + elsif seq_beg.count( "U" ) > 0 + "RNA" + else + "DNA" + end + end + + # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes. + def seq_type_guess! + @seq_type = seq_type_guess + end + + # Method that return an array of the residue alphabet for a given sequence type. + def seq_alph( seq_type ) + case seq_type.upcase + when 'DNA' + %w{ A T C G } + when 'RNA' + %w{ A U C G } + when 'AA' + %w{ F L S Y C W P H Q R I M T N K V A D E G } + else + raise "ERROR: Sequence type '#{ seq_type }' not recognized." + end + end + + # Method to wrap a sequence to a given width using a given delimiter. + def wrap( width = 80, delimit = "\n" ) + raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum + raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0 + @seq.tr!( " \t\n\r", '' ) + @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ) + end + + # Method to wrap and replace a sequence to a given width using a given delimiter. + def wrap!( width = 80, delimit = "\n" ) + @seq = wrap( width, delimit ) + end + + # Method that generates a random sequence of a given length. + def generate( length ) + raise "ERROR: Length must be an integer." unless length.is_a? Fixnum + raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0 + + alph = seq_alph( @seq_type ) + seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join + end + + # Method that replaces sequence with a randomly generated sequence of a given length. + def generate!( length ) + @seq = generate( length ) + end + + # Class containing methods specific for amino acid (AA) sequences. + class AA < Seq + # Method to initialize a new amino acid sequence. + def initialize( seq = nil ) + @seq = seq + @seq_type = "AA" + end + + # Calculate the molecular weight of an amino acid seuqunce. + # The caluculation is only approximate since there is no correction + # for amino bond formation and the MW used are somewhat imprecise: + # http://www.expasy.ch/tools/pscale/Molecularweight.html + def mol_weight + mol_weight_aa = { + "A" => 89.000, # Ala + "R" => 174.000, # Arg + "N" => 132.000, # Asn + "D" => 133.000, # Asp + "C" => 121.000, # Cys + "Q" => 146.000, # Gln + "E" => 147.000, # Glu + "G" => 75.000, # Gly + "H" => 155.000, # His + "I" => 131.000, # Ile + "L" => 131.000, # Leu + "K" => 146.000, # Lys + "M" => 149.000, # Met + "F" => 165.000, # Phe + "P" => 115.000, # Pro + "S" => 105.000, # Ser + "T" => 119.000, # Thr + "W" => 204.000, # Trp + "Y" => 181.000, # Tyr + "V" => 117.000, # Val + } + + mw = 0.0 + + @seq.upcase.each_char do |c| + raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c ) + mw += mol_weight_aa[ c ] + end + + mw + end + end + + # Class containing methods specific for nucleic acid (NA) sequences. + class NA < Seq + # Class containing methods specific for DNA sequences. + class DNA < NA + # Method that complements DNA sequence including ambiguity codes. + def complement + @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) + end + end + + # Class containing methods specific for RNA sequences. + class RNA < NA + # Method that complements RNA sequence including ambiguity codes. + def complement + @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) + end + end + end +end diff --git a/code_ruby/Maasha/seq.rb b/code_ruby/Maasha/seq.rb deleted file mode 100644 index 3324ff1..0000000 --- a/code_ruby/Maasha/seq.rb +++ /dev/null @@ -1,100 +0,0 @@ -# Class containing generic sequence methods and nucleic acid and amino acid subclasses. -class Seq < String - attr_accessor :seq, :seq_type - - # Method to initialize a new sequence. - def initialize( seq = nil, seq_type = nil ) - @seq = seq - @seq_type = seq_type - end - - # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes. - def seq_type? - seq_beg = @seq[ 0, 100 ].upcase - - if seq_beg.count( "FLPQIE" ) > 0 - "PROTEIN" - elsif seq_beg.count( "U" ) > 0 - "RNA" - else - "DNA" - end - end - - # Method that return an array of the residue alphabet for a given sequence type. - def seq_alph( seq_type ) - @seq_type = seq_type.upcase! - - case seq_type - when 'DNA' - %w{ A T C G } - when 'RNA' - %w{ A U C G } - when 'PROTEIN' - %w{ F L S Y C W P H Q R I M T N K V A D E G } - else - raise "ERROR: sequence type '#{ seq_type }' not recognized." - end - end - - # Method to wrap a sequence to a given width using a given delimiter. - def wrap( width = 80, delimit = "\n" ) - @width = width - @delimit = delimit - - @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ) - end - - # Method to wrap and replace a sequence to a given width using a given delimiter. - def wrap!( width = 80, delimit = "\n" ) - @width = width - @delimit = delimit - - @seq.gsub!( /.{#{ width }}/, "\\0#{ delimit }" ) - end - - # Method that generates a random sequence of a given length. - def generate( seq_type, length ) - @seq_type = seq_type - @length = length - - raise "ERROR: length must be greater than 0 - not #{ length }." if length <= 0 - - alph = seq_alph( seq_type ) - seq = "" - - seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join - end - - # Method that replaces sequence with a randomly generated sequence of a given length. - def generate!( seq_type, length ) - @seq_type = seq_type - @length = length - @seq = generate( seq_type, length ) - end - - # Class containing methods specific for nucleic acid (NA) sequences. - class NA < Seq - # Class containing methods specific for DNA sequences. - class DNA < NA - # Method that complements DNA sequence including ambiguity codes. - def complement - @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) - end - end - - # Class containing methods specific for RNA sequences. - class RNA < NA - # Method that complements RNA sequence including ambiguity codes. - def complement - @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) - end - end - end - - # Class containing methods specific for amino acid (AA) sequences. - class AA < Seq - # orf call - end -end -