1 # Class containing generic sequence methods and nucleic acid and amino acid subclasses.
3 attr_accessor :seq, :seq_type
5 # Method to initialize a new sequence.
6 def initialize( seq = "", seq_type = nil )
11 # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
13 seq_beg = @seq[ 0, 100 ].upcase
15 if seq_beg.count( "FLPQIE" ) > 0
17 elsif seq_beg.count( "U" ) > 0
24 # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
26 @seq_type = seq_type_guess
29 # Method that return an array of the residue alphabet for a given sequence type.
30 def seq_alph( seq_type )
32 :DNA => %w{ A T C G },
33 :RNA => %w{ A U C G },
34 :AA => %w{ F L S Y C W P H Q R I M T N K V A D E G },
37 raise "ERROR: Sequence type '#{ seq_type }' not recognized." unless hash.include?( seq_type )
38 return hash[ seq_type ]
41 # Method to wrap a sequence to a given width using a given delimiter.
42 def wrap( width = 80, delimit = "\n" )
43 raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum
44 raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0
45 @seq.tr!( " \t\n\r", '' )
46 @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ).sub( /#{ delimit }$/, "" )
49 # Method to wrap and replace a sequence to a given width using a given delimiter.
50 def wrap!( width = 80, delimit = "\n" )
51 @seq = wrap( width, delimit )
54 # Method that generates a random sequence of a given length.
55 def generate( length )
56 raise "ERROR: Length must be an integer." unless length.is_a? Fixnum
57 raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0
59 alph = seq_alph( @seq_type )
60 seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
63 # Method that replaces sequence with a randomly generated sequence of a given length.
64 def generate!( length )
65 @seq = generate( length )
68 # Class containing methods specific for amino acid (AA) sequences.
70 # Method to initialize a new amino acid sequence.
71 def initialize( seq = "" )
76 # Calculate the molecular weight of an amino acid seuqunce.
77 # The caluculation is only approximate since there is no correction
78 # for amino bond formation and the MW used are somewhat imprecise:
79 # http://www.expasy.ch/tools/pscale/Molecularweight.html
100 "Y" => 181.000, # Tyr
101 "V" => 117.000, # Val
106 @seq.upcase.each_char do |c|
107 raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c )
108 mw += mol_weight_aa[ c ]
115 # Class containing methods specific for nucleic acid (NA) sequences.
117 # Class containing methods specific for DNA sequences.
119 # Method that complements DNA sequence including ambiguity codes.
121 @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
125 # Class containing methods specific for RNA sequences.
127 # Method that complements RNA sequence including ambiguity codes.
129 @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )