require 'maasha/seq/trim'
require 'narray'
-autoload :BackTrack, 'maasha/seq/backtrack.rb'
-autoload :Dynamic, 'maasha/seq/dynamic.rb'
-autoload :Homopolymer, 'maasha/seq/homopolymer.rb'
+autoload :BackTrack, 'maasha/seq/backtrack'
+autoload :Dynamic, 'maasha/seq/dynamic'
+autoload :Homopolymer, 'maasha/seq/homopolymer'
+autoload :Hamming, 'maasha/seq/hamming'
+autoload :Levenshtein, 'maasha/seq/levenshtein'
+autoload :Ambiguity, 'maasha/seq/ambiguity'
# Residue alphabets
DNA = %w[a t c g]
"GTG" => "V", "GCG" => "A", "GAG" => "E", "GGG" => "G"
}
-
# Error class for all exceptions to do with Seq.
class SeqError < StandardError; end
type = record[:SEQ_TYPE].to_sym if record[:SEQ_TYPE]
qual = record[:SCORES]
- self.new(seq_name, seq, type, qual)
+ self.new(seq_name: seq_name, seq: seq, type: type, qual: qual)
end
# Class method that generates all possible oligos of a specifed length and type.
oligos
end
- # Initialize a sequence object with the following arguments:
- # - seq_name: Name of the sequence.
- # - seq: The sequence.
- # - type: The sequence type - DNA, RNA, or protein
- # - qual: An Illumina type quality scores string.
- def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
- @seq_name = seq_name
- @seq = seq
- @type = type
- @qual = qual
+ # Initialize a sequence object with the following options:
+ # - :seq_name Name of the sequence.
+ # - :seq The sequence.
+ # - :type The sequence type - DNA, RNA, or protein
+ # - :qual An Illumina type quality scores string.
+ def initialize(options = {})
+ @seq_name = options[:seq_name]
+ @seq = options[:seq]
+ @type = options[:type]
+ @qual = options[:qual]
+
+ if @seq and @qual and @seq.length != @qual.length
+ raise SeqError, "Sequence length and score length mismatch: #{@seq.length} != #{@qual.length}"
+ end
end
# Method that guesses and returns the sequence type
raise SeqError, "Missing seq_name" if self.seq_name.nil? or self.seq_name == ''
raise SeqError, "Missing seq" if self.seq.nil? or self.seq.empty?
- seq_name = self.seq_name.to_s
- seq = self.seq.to_s
+ seq_name = self.seq_name
+ seq = self.seq.dup
unless wrap.nil?
seq.gsub!(/(.{#{wrap}})/) do |match|
# Method to reverse the sequence.
def reverse
- Seq.new(self.seq_name, self.seq.reverse, self.type, self.qual ? self.qual.reverse : self.qual)
+ entry = Seq.new(
+ seq_name: self.seq_name,
+ seq: self.seq.reverse,
+ type: self.type,
+ qual: (self.qual ? self.qual.reverse : self.qual)
+ )
+
+ entry
end
# Method to reverse the sequence.
def complement
raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
- entry = Seq.new
- entry.seq_name = self.seq_name
- entry.type = self.type
- entry.qual = self.qual
+ entry = Seq.new(
+ seq_name: self.seq_name,
+ type: self.type,
+ qual: self.qual
+ )
if self.is_dna?
entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
# Method to determine the Hamming Distance between
# two Sequence objects (case insensitive).
- def hamming_distance(seq)
- self.seq.upcase.hamming_distance(seq.seq.upcase)
+ def hamming_distance(entry, options = nil)
+ if options and options[:ambiguity]
+ Hamming.distance(self.seq, entry.seq)
+ else
+ self.seq.upcase.hamming_distance(entry.seq.upcase)
+ end
+ end
+
+ # Method to determine the Edit Distance between
+ # two Sequence objects (case insensitive).
+ def edit_distance(entry)
+ Levenshtein.distance(self.seq, entry.seq)
end
# Method that generates a random sequence of a given length and type.
# Method to return a new Seq object with shuffled sequence.
def shuffle
- Seq.new(self.seq_name, self.seq.split('').shuffle!.join, self.type, self.qual)
+ Seq.new(
+ seq_name: self.seq_name,
+ seq: self.seq.split('').shuffle!.join,
+ type: self.type,
+ qual: self.qual
+ )
end
# Method to shuffle a sequence randomly inline.
seq_name = self.seq_name.nil? ? nil : self.seq_name.dup
- Seq.new(seq_name, seq, self.type, qual)
+ Seq.new(seq_name: seq_name, seq: seq, type: self.type, qual: qual)
end
# Method that replaces a sequence with a subsequence from a given start position
regex_start = Regexp.new(start_codons.join('|'), true)
regex_stop = Regexp.new(stop_codons.join('|'), true)
- while pos_beg and pos_beg < self.length - size_min
- if pos_beg = self.seq.index(regex_start, pos_beg)
- if pos_end = self.seq.index(regex_stop, pos_beg)
- length = (pos_end - pos_beg) + 3
+ while pos_beg = self.seq.index(regex_start, pos_beg)
+ pos_end = pos_beg + 3
+
+ while pos_end = self.seq.index(regex_stop, pos_end)
+ length = (pos_end - pos_beg) + 3
- if (length % 3) == 0
- if size_min <= length and length <= size_max
- subseq = self.subseq(pos_beg, length)
+ if (length % 3) == 0
+ if size_min <= length and length <= size_max
+ subseq = self.subseq(pos_beg, length)
- orfs << [subseq, pos_beg, pos_end + 3]
- end
+ orfs << [subseq, pos_beg, pos_end + 3]
end
+
+ break
end
- pos_beg += 1
+ pos_end += 1
end
+
+ pos_beg += 1
end
if pick_longest