]> git.donarmstrong.com Git - biopieces.git/commitdiff
added seq.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 29 Jun 2009 19:15:32 +0000 (19:15 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 29 Jun 2009 19:15:32 +0000 (19:15 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@544 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/seq.rb [new file with mode: 0644]

diff --git a/code_ruby/Maasha/seq.rb b/code_ruby/Maasha/seq.rb
new file mode 100644 (file)
index 0000000..3324ff1
--- /dev/null
@@ -0,0 +1,100 @@
+# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
+class Seq < String
+       attr_accessor :seq, :seq_type
+
+       # Method to initialize a new sequence.
+       def initialize( seq = nil, seq_type = nil )
+               @seq      = seq
+               @seq_type = seq_type
+       end
+
+       # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+       def seq_type?
+               seq_beg = @seq[ 0, 100 ].upcase
+
+               if seq_beg.count( "FLPQIE" ) > 0
+                       "PROTEIN"
+               elsif seq_beg.count( "U" ) > 0
+                       "RNA"
+               else
+                       "DNA"
+               end
+       end
+
+       # Method that return an array of the residue alphabet for a given sequence type.
+       def seq_alph( seq_type )
+               @seq_type = seq_type.upcase!
+
+               case seq_type
+               when 'DNA'
+                       %w{ A T C G }
+               when 'RNA'
+                       %w{ A U C G }
+               when 'PROTEIN'
+                       %w{ F L S Y C W P H Q R I M T N K V A D E G }
+               else
+                       raise "ERROR: sequence type '#{ seq_type }' not recognized."
+               end
+       end
+
+       # Method to wrap a sequence to a given width using a given delimiter.
+       def wrap( width = 80, delimit = "\n" )
+               @width   = width
+               @delimit = delimit
+
+               @seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" )
+       end
+
+       # Method to wrap and replace a sequence to a given width using a given delimiter.
+       def wrap!( width = 80, delimit = "\n" )
+               @width   = width
+               @delimit = delimit
+
+               @seq.gsub!( /.{#{ width }}/, "\\0#{ delimit }" )
+       end
+
+       # Method that generates a random sequence of a given length.
+       def generate( seq_type, length )
+               @seq_type = seq_type
+               @length   = length
+
+               raise "ERROR: length must be greater than 0 - not #{ length }." if length <= 0
+
+               alph = seq_alph( seq_type )
+               seq  = ""
+
+               seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
+       end
+
+       # Method that replaces sequence with a randomly generated sequence of a given length.
+       def generate!( seq_type, length )
+               @seq_type = seq_type
+               @length   = length
+               @seq      = generate( seq_type, length )
+       end
+
+       # Class containing methods specific for nucleic acid (NA) sequences.
+       class NA < Seq
+               # Class containing methods specific for DNA sequences.
+               class DNA < NA
+                       # Method that complements DNA sequence including ambiguity codes.
+                       def complement
+                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+                       end
+               end                     
+
+               # Class containing methods specific for RNA sequences.
+               class RNA < NA
+                       # Method that complements RNA sequence including ambiguity codes.
+                       def complement
+                               @seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+                       end
+               end
+       end
+
+       # Class containing methods specific for amino acid (AA) sequences.
+       class AA < Seq
+               # orf call
+       end
+end
+