]> git.donarmstrong.com Git - biopieces.git/commitdiff
added orf finder to seq.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 13 Jul 2012 13:51:58 +0000 (13:51 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 13 Jul 2012 13:51:58 +0000 (13:51 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1866 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/seq.rb

index 55c9c1f31b9cf7b2adabd14e65436766474bf580..8592cdc3348e1a9ffb71128d640f5c6da8f0e7ec 100644 (file)
@@ -324,7 +324,6 @@ class Seq
     raise SeqError, "subsequence length: #{length} < 0"                                              if length < 0
     raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length
 
-
     if length == 0
       seq  = ""
       qual = "" unless self.qual.nil?
@@ -335,8 +334,7 @@ class Seq
       qual = self.qual[start .. stop] unless self.qual.nil?
     end
 
-
-    Seq.new(self.seq_name, seq, self.type, qual)
+    Seq.new(self.seq_name.dup, seq, self.type, qual)
   end
 
   # Method that replaces a sequence with a subsequence from a given start position
@@ -508,6 +506,45 @@ class Seq
     na_qual -= SCORE_BASE
     na_qual.mean
   end
+
+  # Method to find open reading frames (ORFs).
+  def each_orf(size_min, size_max, start_codons, stop_codons, pick_longest = false)
+    orfs    = []
+    pos_beg = 0
+
+    regex_start = Regexp.new(start_codons.join('|'))
+    regex_stop  = Regexp.new(stop_codons.join('|'))
+
+    while pos_beg and pos_beg < self.length - size_min
+      if pos_beg = self.seq.index(regex_start, pos_beg)
+        if pos_end = self.seq.index(regex_stop, pos_beg)
+          length = (pos_end - pos_beg) + 3
+
+          if size_min <= length and length <= size_max
+            subseq = self.subseq(pos_beg, length)
+
+            orfs << [subseq, pos_beg, pos_end + 3]
+          end
+        end
+
+        pos_beg += 1
+      end
+    end
+
+    if pick_longest
+      orf_hash = {}
+
+      orfs.each { |orf| orf_hash[orf.last] = orf }
+
+      orfs = orf_hash.values
+    end
+
+    if block_given?
+      orfs.each { |orf| yield orf }
+    else
+      return orfs
+    end
+  end
 end
 
 __END__