From: martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Date: Mon, 29 Jun 2009 19:15:32 +0000 (+0000)
Subject: added seq.rb
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=137de779c1ba1c4afeff69119461119032a89525;p=biopieces.git

added seq.rb

git-svn-id: http://biopieces.googlecode.com/svn/trunk@544 74ccb610-7750-0410-82ae-013aeee3265d
---

diff --git a/code_ruby/Maasha/seq.rb b/code_ruby/Maasha/seq.rb
new file mode 100644
index 0000000..3324ff1
--- /dev/null
+++ b/code_ruby/Maasha/seq.rb
@@ -0,0 +1,100 @@
+# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
+class Seq < String
+	attr_accessor :seq, :seq_type
+
+	# Method to initialize a new sequence.
+	def initialize( seq = nil, seq_type = nil )
+		@seq      = seq
+		@seq_type = seq_type
+	end
+
+	# Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
+	def seq_type?
+		seq_beg = @seq[ 0, 100 ].upcase
+
+		if seq_beg.count( "FLPQIE" ) > 0
+			"PROTEIN"
+		elsif seq_beg.count( "U" ) > 0
+			"RNA"
+		else
+			"DNA"
+		end
+	end
+
+	# Method that return an array of the residue alphabet for a given sequence type.
+	def seq_alph( seq_type )
+		@seq_type = seq_type.upcase!
+
+		case seq_type
+		when 'DNA'
+			%w{ A T C G }
+		when 'RNA'
+			%w{ A U C G }
+		when 'PROTEIN'
+			%w{ F L S Y C W P H Q R I M T N K V A D E G }
+		else
+			raise "ERROR: sequence type '#{ seq_type }' not recognized."
+		end
+	end
+
+	# Method to wrap a sequence to a given width using a given delimiter.
+	def wrap( width = 80, delimit = "\n" )
+		@width   = width
+		@delimit = delimit
+
+		@seq.gsub( /.{#{ width }}/, "\\0#{ delimit }" )
+	end
+
+	# Method to wrap and replace a sequence to a given width using a given delimiter.
+	def wrap!( width = 80, delimit = "\n" )
+		@width   = width
+		@delimit = delimit
+
+		@seq.gsub!( /.{#{ width }}/, "\\0#{ delimit }" )
+	end
+
+	# Method that generates a random sequence of a given length.
+	def generate( seq_type, length )
+		@seq_type = seq_type
+		@length   = length
+
+		raise "ERROR: length must be greater than 0 - not #{ length }." if length <= 0
+
+		alph = seq_alph( seq_type )
+		seq  = ""
+
+		seq = Array.new( length ) { alph[ rand( alph.size ) ] }.join
+	end
+
+	# Method that replaces sequence with a randomly generated sequence of a given length.
+	def generate!( seq_type, length )
+		@seq_type = seq_type
+		@length   = length
+		@seq      = generate( seq_type, length )
+	end
+
+	# Class containing methods specific for nucleic acid (NA) sequences.
+	class NA < Seq
+		# Class containing methods specific for DNA sequences.
+		class DNA < NA
+			# Method that complements DNA sequence including ambiguity codes.
+			def complement
+				@seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+			end
+		end			
+
+		# Class containing methods specific for RNA sequences.
+		class RNA < NA
+			# Method that complements RNA sequence including ambiguity codes.
+			def complement
+				@seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+			end
+		end
+	end
+
+	# Class containing methods specific for amino acid (AA) sequences.
+	class AA < Seq
+		# orf call
+	end
+end
+