- attr_accessor :seq, :seq_type
-
- # Method to initialize a new sequence.
- def initialize( seq = "", seq_type = nil )
- @seq = seq
- @seq_type = seq_type
- end
-
- # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
- def seq_type_guess
- seq_beg = @seq[ 0, 100 ].upcase
-
- if seq_beg.count( "FLPQIE" ) > 0
- "AA"
- elsif seq_beg.count( "U" ) > 0
- "RNA"
- else
- "DNA"
- end
- end
-
- # Guess and replace the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
- def seq_type_guess!
- @seq_type = seq_type_guess
- end
-
- # Method that return an array of the residue alphabet for a given sequence type.
- def seq_alph( seq_type )
- case seq_type.upcase
- when 'DNA'
- %w{ A T C G }
- when 'RNA'
- %w{ A U C G }
- when 'AA'
- %w{ F L S Y C W P H Q R I M T N K V A D E G }
- else
- raise "ERROR: Sequence type '#{ seq_type }' not recognized."
- end
- end
-
- # Method to wrap a sequence to a given width using a given delimiter.
- def wrap( width = 80, delimit = "\n" )
- raise "ERROR: Wrap width must be an integer." unless width.is_a? Fixnum
- raise "ERROR: Cannot wrap sequence to negative width: #{ width }." if width <= 0
- @seq.tr!( " \t\n\r", '' )
- @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" ).sub( /#{ delimit }$/, "" )
- end
-
- # Method to wrap and replace a sequence to a given width using a given delimiter.
- def wrap!( width = 80, delimit = "\n" )
- @seq = wrap( width, delimit )
- end
-
- # Method that generates a random sequence of a given length.
- def generate( length )
- raise "ERROR: Length must be an integer." unless length.is_a? Fixnum
- raise "ERROR: Cannot generate negative sequence length: #{ length }." if length <= 0
-
- alph = seq_alph( @seq_type )
- seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
- end
-
- # Method that replaces sequence with a randomly generated sequence of a given length.
- def generate!( length )
- @seq = generate( length )
- end
-
- # Class containing methods specific for amino acid (AA) sequences.
- class AA < Seq
- # Method to initialize a new amino acid sequence.
- def initialize( seq = "" )
- @seq = seq
- @seq_type = "AA"
- end
-
- # Calculate the molecular weight of an amino acid seuqunce.
- # The caluculation is only approximate since there is no correction
- # for amino bond formation and the MW used are somewhat imprecise:
- # http://www.expasy.ch/tools/pscale/Molecularweight.html
- def mol_weight
- mol_weight_aa = {
- "A" => 89.000, # Ala
- "R" => 174.000, # Arg
- "N" => 132.000, # Asn
- "D" => 133.000, # Asp
- "C" => 121.000, # Cys
- "Q" => 146.000, # Gln
- "E" => 147.000, # Glu
- "G" => 75.000, # Gly
- "H" => 155.000, # His
- "I" => 131.000, # Ile
- "L" => 131.000, # Leu
- "K" => 146.000, # Lys
- "M" => 149.000, # Met
- "F" => 165.000, # Phe
- "P" => 115.000, # Pro
- "S" => 105.000, # Ser
- "T" => 119.000, # Thr
- "W" => 204.000, # Trp
- "Y" => 181.000, # Tyr
- "V" => 117.000, # Val
- }
-
- mw = 0.0
-
- @seq.upcase.each_char do |c|
- raise "ERROR: Unknown amino acid: #{ c }" unless mol_weight_aa.include?( c )
- mw += mol_weight_aa[ c ]
- end
-
- mw
- end
- end
-
- # Class containing methods specific for nucleic acid (NA) sequences.
- class NA < Seq
- # Class containing methods specific for DNA sequences.
- class DNA < NA
- # Method that complements DNA sequence including ambiguity codes.
- def complement
- @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
- end
- end
-
- # Class containing methods specific for RNA sequences.
- class RNA < NA
- # Method that complements RNA sequence including ambiguity codes.
- def complement
- @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
- end
- end
- end
-end
+ # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+ def guess_type
+ raise ArgumentError, "No sequence." if self.empty?
+
+ seq_beg = self[0, 100].upcase
+
+ if seq_beg.count( "FLPQIE" ) > 0
+ Seq::AA.new(self)
+ elsif seq_beg.count("U") > 0
+ Seq::NA::RNA.new(self)
+ else
+ Seq::NA::DNA.new(self)
+ end
+ end
+
+ # Method to wrap a sequence to a given width using a given delimiter.
+ def wrap(width = 80, delimit = $/)
+ raise ArgumentError, "Cannot wrap sequence to negative width: #{width}." if width <= 0
+
+ self.delete!(" \t\n\r")
+ self.gsub(/.{#{width}}(?!$)/, "\\0#{delimit}")
+ end
+
+ # Method to wrap and replace a sequence to a given width using a given delimiter.
+ def wrap!(width = 80, delimit = $/)
+ self.replace(self.wrap(width, delimit))
+ end
+
+ # Method that generates a random sequence of a given length.
+ def generate(length)
+ raise ArgumentError, "Cannot generate negative sequence length: #{length}." if length <= 0
+
+ alph = self.residues
+ Array.new(length) { alph[rand(alph.size)] }.join("")
+ end
+
+ # Method that replaces sequence with a randomly generated sequence of a given length.
+ def generate!(length)
+ self.replace(self.generate(length))
+ end
+
+ # Class containing methods specific for amino acid (AA) sequences.
+ class AA < Seq
+ # Method that returns an array of amino acid residues.
+ def residues
+ %w{ F L S Y C W P H Q R I M T N K V A D E G }
+ end
+
+ # Calculate the molecular weight of an amino acid seuqunce.
+ # The caluculation is only approximate since there is no correction
+ # for amino bond formation and the MW used are somewhat imprecise:
+ # http://www.expasy.ch/tools/pscale/Molecularweight.html
+ def mol_weight
+ raise ArgumentError, "invalid residues found: #{self.delete("#{residues.join( "" )}")}" if self.upcase =~ /[^#{residues.join( "" )}]/
+
+ mol_weight_aa = {
+ "A" => 89.000, # Ala
+ "R" => 174.000, # Arg
+ "N" => 132.000, # Asn
+ "D" => 133.000, # Asp
+ "C" => 121.000, # Cys
+ "Q" => 146.000, # Gln
+ "E" => 147.000, # Glu
+ "G" => 75.000, # Gly
+ "H" => 155.000, # His
+ "I" => 131.000, # Ile
+ "L" => 131.000, # Leu
+ "K" => 146.000, # Lys
+ "M" => 149.000, # Met
+ "F" => 165.000, # Phe
+ "P" => 115.000, # Pro
+ "S" => 105.000, # Ser
+ "T" => 119.000, # Thr
+ "W" => 204.000, # Trp
+ "Y" => 181.000, # Tyr
+ "V" => 117.000, # Val
+ }
+
+ mw = 0.0
+
+ self.upcase.each_char { |c| mw += mol_weight_aa[ c ] }
+
+ mw
+ end
+ end