--- /dev/null
+# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
+class Seq < String
+ attr_accessor :seq, :seq_type
+
+ # Method to initialize a new sequence.
+ def initialize( seq = nil, seq_type = nil )
+ @seq = seq
+ @seq_type = seq_type
+ end
+
+ # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+ def seq_type_guess
+ seq_beg = @seq[ 0, 100 ].upcase
+
+ if seq_beg.count( "FLPQIE" ) > 0
+ "AA"
+ elsif seq_beg.count( "U" ) > 0
+ "RNA"
+ else
+ "DNA"
+ end
+ end
+
+ # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+ def seq_type_guess!
+ @seq_type = seq_type_guess
+ end
+
+ # Method that return an array of the residue alphabet for a given sequence type.
+ def seq_alph( seq_type )
+ case seq_type.upcase
+ when 'DNA'
+ %w{ A T C G }
+ when 'RNA'
+ %w{ A U C G }
+ when 'AA'
+ %w{ F L S Y C W P H Q R I M T N K V A D E G }
+ else
+ raise "ERROR: Sequence type '#{ seq_type }' not recognized."
+ end
+ end
+
+ # Method to wrap a sequence to a given width using a given delimiter.
+ def wrap( width = 80, delimit = "\n" )
+ raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum
+ raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0
+ @seq.tr!( " \t\n\r", '' )
+ @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" )
+ end
+
+ # Method to wrap and replace a sequence to a given width using a given delimiter.
+ def wrap!( width = 80, delimit = "\n" )
+ @seq = wrap( width, delimit )
+ end
+
+ # Method that generates a random sequence of a given length.
+ def generate( length )
+ raise "ERROR: Length must be an integer." unless length.is_a? Fixnum
+ raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0
+
+ alph = seq_alph( @seq_type )
+ seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
+ end
+
+ # Method that replaces sequence with a randomly generated sequence of a given length.
+ def generate!( length )
+ @seq = generate( length )
+ end
+
+ # Class containing methods specific for amino acid (AA) sequences.
+ class AA < Seq
+ # Method to initialize a new amino acid sequence.
+ def initialize( seq = nil )
+ @seq = seq
+ @seq_type = "AA"
+ end
+
+ # Calculate the molecular weight of an amino acid seuqunce.
+ # The caluculation is only approximate since there is no correction
+ # for amino bond formation and the MW used are somewhat imprecise:
+ # http://www.expasy.ch/tools/pscale/Molecularweight.html
+ def mol_weight
+ mol_weight_aa = {
+ "A" => 89.000, # Ala
+ "R" => 174.000, # Arg
+ "N" => 132.000, # Asn
+ "D" => 133.000, # Asp
+ "C" => 121.000, # Cys
+ "Q" => 146.000, # Gln
+ "E" => 147.000, # Glu
+ "G" => 75.000, # Gly
+ "H" => 155.000, # His
+ "I" => 131.000, # Ile
+ "L" => 131.000, # Leu
+ "K" => 146.000, # Lys
+ "M" => 149.000, # Met
+ "F" => 165.000, # Phe
+ "P" => 115.000, # Pro
+ "S" => 105.000, # Ser
+ "T" => 119.000, # Thr
+ "W" => 204.000, # Trp
+ "Y" => 181.000, # Tyr
+ "V" => 117.000, # Val
+ }
+
+ mw = 0.0
+
+ @seq.upcase.each_char do |c|
+ raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c )
+ mw += mol_weight_aa[ c ]
+ end
+
+ mw
+ end
+ end
+
+ # Class containing methods specific for nucleic acid (NA) sequences.
+ class NA < Seq
+ # Class containing methods specific for DNA sequences.
+ class DNA < NA
+ # Method that complements DNA sequence including ambiguity codes.
+ def complement
+ @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+ end
+ end
+
+ # Class containing methods specific for RNA sequences.
+ class RNA < NA
+ # Method that complements RNA sequence including ambiguity codes.
+ def complement
+ @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+ end
+ end
+ end
+end
+++ /dev/null
-# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
-class Seq < String
- attr_accessor :seq, :seq_type
-
- # Method to initialize a new sequence.
- def initialize( seq = nil, seq_type = nil )
- @seq = seq
- @seq_type = seq_type
- end
-
- # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
- def seq_type?
- seq_beg = @seq[ 0, 100 ].upcase
-
- if seq_beg.count( "FLPQIE" ) > 0
- "PROTEIN"
- elsif seq_beg.count( "U" ) > 0
- "RNA"
- else
- "DNA"
- end
- end
-
- # Method that return an array of the residue alphabet for a given sequence type.
- def seq_alph( seq_type )
- @seq_type = seq_type.upcase!
-
- case seq_type
- when 'DNA'
- %w{ A T C G }
- when 'RNA'
- %w{ A U C G }
- when 'PROTEIN'
- %w{ F L S Y C W P H Q R I M T N K V A D E G }
- else
- raise "ERROR: sequence type '#{ seq_type }' not recognized."
- end
- end
-
- # Method to wrap a sequence to a given width using a given delimiter.
- def wrap( width = 80, delimit = "\n" )
- @width = width
- @delimit = delimit
-
- @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" )
- end
-
- # Method to wrap and replace a sequence to a given width using a given delimiter.
- def wrap!( width = 80, delimit = "\n" )
- @width = width
- @delimit = delimit
-
- @seq.gsub!( /.{#{ width }}/, "\\0#{ delimit }" )
- end
-
- # Method that generates a random sequence of a given length.
- def generate( seq_type, length )
- @seq_type = seq_type
- @length = length
-
- raise "ERROR: length must be greater than 0 - not #{ length }." if length <= 0
-
- alph = seq_alph( seq_type )
- seq = ""
-
- seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
- end
-
- # Method that replaces sequence with a randomly generated sequence of a given length.
- def generate!( seq_type, length )
- @seq_type = seq_type
- @length = length
- @seq = generate( seq_type, length )
- end
-
- # Class containing methods specific for nucleic acid (NA) sequences.
- class NA < Seq
- # Class containing methods specific for DNA sequences.
- class DNA < NA
- # Method that complements DNA sequence including ambiguity codes.
- def complement
- @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
- end
- end
-
- # Class containing methods specific for RNA sequences.
- class RNA < NA
- # Method that complements RNA sequence including ambiguity codes.
- def complement
- @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
- end
- end
- end
-
- # Class containing methods specific for amino acid (AA) sequences.
- class AA < Seq
- # orf call
- end
-end
-