From f0473964c49afca2f5d36c06e65fc01a453d3ccd Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 13 Jul 2012 13:51:58 +0000 Subject: [PATCH] added orf finder to seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1866 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq.rb | 43 ++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 55c9c1f..8592cdc 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -324,7 +324,6 @@ class Seq raise SeqError, "subsequence length: #{length} < 0" if length < 0 raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length - if length == 0 seq = "" qual = "" unless self.qual.nil? @@ -335,8 +334,7 @@ class Seq qual = self.qual[start .. stop] unless self.qual.nil? end - - Seq.new(self.seq_name, seq, self.type, qual) + Seq.new(self.seq_name.dup, seq, self.type, qual) end # Method that replaces a sequence with a subsequence from a given start position @@ -508,6 +506,45 @@ class Seq na_qual -= SCORE_BASE na_qual.mean end + + # Method to find open reading frames (ORFs). + def each_orf(size_min, size_max, start_codons, stop_codons, pick_longest = false) + orfs = [] + pos_beg = 0 + + regex_start = Regexp.new(start_codons.join('|')) + regex_stop = Regexp.new(stop_codons.join('|')) + + while pos_beg and pos_beg < self.length - size_min + if pos_beg = self.seq.index(regex_start, pos_beg) + if pos_end = self.seq.index(regex_stop, pos_beg) + length = (pos_end - pos_beg) + 3 + + if size_min <= length and length <= size_max + subseq = self.subseq(pos_beg, length) + + orfs << [subseq, pos_beg, pos_end + 3] + end + end + + pos_beg += 1 + end + end + + if pick_longest + orf_hash = {} + + orfs.each { |orf| orf_hash[orf.last] = orf } + + orfs = orf_hash.values + end + + if block_given? + orfs.each { |orf| yield orf } + else + return orfs + end + end end __END__ -- 2.39.5