raise SeqError, "subsequence length: #{length} < 0" if length < 0
raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length
-
if length == 0
seq = ""
qual = "" unless self.qual.nil?
qual = self.qual[start .. stop] unless self.qual.nil?
end
-
- Seq.new(self.seq_name, seq, self.type, qual)
+ Seq.new(self.seq_name.dup, seq, self.type, qual)
end
# Method that replaces a sequence with a subsequence from a given start position
na_qual -= SCORE_BASE
na_qual.mean
end
+
+ # Method to find open reading frames (ORFs).
+ def each_orf(size_min, size_max, start_codons, stop_codons, pick_longest = false)
+ orfs = []
+ pos_beg = 0
+
+ regex_start = Regexp.new(start_codons.join('|'))
+ regex_stop = Regexp.new(stop_codons.join('|'))
+
+ while pos_beg and pos_beg < self.length - size_min
+ if pos_beg = self.seq.index(regex_start, pos_beg)
+ if pos_end = self.seq.index(regex_stop, pos_beg)
+ length = (pos_end - pos_beg) + 3
+
+ if size_min <= length and length <= size_max
+ subseq = self.subseq(pos_beg, length)
+
+ orfs << [subseq, pos_beg, pos_end + 3]
+ end
+ end
+
+ pos_beg += 1
+ end
+ end
+
+ if pick_longest
+ orf_hash = {}
+
+ orfs.each { |orf| orf_hash[orf.last] = orf }
+
+ orfs = orf_hash.values
+ end
+
+ if block_given?
+ orfs.each { |orf| yield orf }
+ else
+ return orfs
+ end
+ end
end
__END__