From 137de779c1ba1c4afeff69119461119032a89525 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 29 Jun 2009 19:15:32 +0000 Subject: [PATCH] added seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@544 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/Maasha/seq.rb | 100 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 code_ruby/Maasha/seq.rb diff --git a/code_ruby/Maasha/seq.rb b/code_ruby/Maasha/seq.rb new file mode 100644 index 0000000..3324ff1 --- /dev/null +++ b/code_ruby/Maasha/seq.rb @@ -0,0 +1,100 @@ +# Class containing generic sequence methods and nucleic acid and amino acid subclasses. +class Seq < String + attr_accessor :seq, :seq_type + + # Method to initialize a new sequence. + def initialize( seq = nil, seq_type = nil ) + @seq = seq + @seq_type = seq_type + end + + # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes. + def seq_type? + seq_beg = @seq[ 0, 100 ].upcase + + if seq_beg.count( "FLPQIE" ) > 0 + "PROTEIN" + elsif seq_beg.count( "U" ) > 0 + "RNA" + else + "DNA" + end + end + + # Method that return an array of the residue alphabet for a given sequence type. + def seq_alph( seq_type ) + @seq_type = seq_type.upcase! + + case seq_type + when 'DNA' + %w{ A T C G } + when 'RNA' + %w{ A U C G } + when 'PROTEIN' + %w{ F L S Y C W P H Q R I M T N K V A D E G } + else + raise "ERROR: sequence type '#{ seq_type }' not recognized." + end + end + + # Method to wrap a sequence to a given width using a given delimiter. + def wrap( width = 80, delimit = "\n" ) + @width = width + @delimit = delimit + + @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ) + end + + # Method to wrap and replace a sequence to a given width using a given delimiter. + def wrap!( width = 80, delimit = "\n" ) + @width = width + @delimit = delimit + + @seq.gsub!( /.{#{ width }}/, "\\0#{ delimit }" ) + end + + # Method that generates a random sequence of a given length. + def generate( seq_type, length ) + @seq_type = seq_type + @length = length + + raise "ERROR: length must be greater than 0 - not #{ length }." if length <= 0 + + alph = seq_alph( seq_type ) + seq = "" + + seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join + end + + # Method that replaces sequence with a randomly generated sequence of a given length. + def generate!( seq_type, length ) + @seq_type = seq_type + @length = length + @seq = generate( seq_type, length ) + end + + # Class containing methods specific for nucleic acid (NA) sequences. + class NA < Seq + # Class containing methods specific for DNA sequences. + class DNA < NA + # Method that complements DNA sequence including ambiguity codes. + def complement + @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) + end + end + + # Class containing methods specific for RNA sequences. + class RNA < NA + # Method that complements RNA sequence including ambiguity codes. + def complement + @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) + end + end + end + + # Class containing methods specific for amino acid (AA) sequences. + class AA < Seq + # orf call + end +end + -- 2.39.5