require 'maasha/seq/trim'
require 'narray'
-autoload :BackTrack, 'maasha/seq/backtrack.rb'
-autoload :Dynamic, 'maasha/seq/dynamic.rb'
+autoload :BackTrack, 'maasha/seq/backtrack'
+autoload :Dynamic, 'maasha/seq/dynamic'
+autoload :Homopolymer, 'maasha/seq/homopolymer'
+autoload :Hamming, 'maasha/seq/hamming'
+autoload :Levenshtein, 'maasha/seq/levenshtein'
+autoload :Ambiguity, 'maasha/seq/ambiguity'
# Residue alphabets
DNA = %w[a t c g]
"GTG" => "V", "GCG" => "A", "GAG" => "E", "GGG" => "G"
}
-
# Error class for all exceptions to do with Seq.
class SeqError < StandardError; end
type = record[:SEQ_TYPE].to_sym if record[:SEQ_TYPE]
qual = record[:SCORES]
- self.new(seq_name, seq, type, qual)
+ self.new(seq_name: seq_name, seq: seq, type: type, qual: qual)
end
# Class method that generates all possible oligos of a specifed length and type.
oligos
end
- # Initialize a sequence object with the following arguments:
- # - seq_name: Name of the sequence.
- # - seq: The sequence.
- # - type: The sequence type - DNA, RNA, or protein
- # - qual: An Illumina type quality scores string.
- def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
- @seq_name = seq_name
- @seq = seq
- @type = type
- @qual = qual
+ # Initialize a sequence object with the following options:
+ # - :seq_name Name of the sequence.
+ # - :seq The sequence.
+ # - :type The sequence type - DNA, RNA, or protein
+ # - :qual An Illumina type quality scores string.
+ def initialize(options = {})
+ @seq_name = options[:seq_name]
+ @seq = options[:seq]
+ @type = options[:type]
+ @qual = options[:qual]
+
+ if @qual and @seq.length != @qual.length
+ raise SeqError, "Sequence length and score length mismatch: #{@seq.length} != #{@qual.length}"
+ end
end
# Method that guesses and returns the sequence type
# Method to reverse the sequence.
def reverse
- Seq.new(self.seq_name, self.seq.reverse, self.type, self.qual ? self.qual.reverse : self.qual)
+ Seq.new(
+ seq_name: self.seq_name,
+ seq: self.seq.reverse,
+ type: self.type,
+ qual: (self.qual ? self.qual.reverse : self.qual)
+ )
end
# Method to reverse the sequence.
def complement
raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
- entry = Seq.new
- entry.seq_name = self.seq_name
- entry.type = self.type
- entry.qual = self.qual
+ entry = Seq.new(
+ seq_name: self.seq_name,
+ type: self.type,
+ qual: self.qual
+ )
if self.is_dna?
entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
# Method to determine the Hamming Distance between
# two Sequence objects (case insensitive).
- def hamming_distance(seq)
- self.seq.upcase.hamming_distance(seq.seq.upcase)
+ def hamming_distance(entry, options = nil)
+ if options and options[:ambiguity]
+ Hamming.distance(self.seq, entry.seq)
+ else
+ self.seq.upcase.hamming_distance(entry.seq.upcase)
+ end
+ end
+
+ # Method to determine the Edit Distance between
+ # two Sequence objects (case insensitive).
+ def edit_distance(entry)
+ Levenshtein.distance(self.seq, entry.seq)
end
# Method that generates a random sequence of a given length and type.
# Method to return a new Seq object with shuffled sequence.
def shuffle
- Seq.new(self.seq_name, self.seq.split('').shuffle!.join, self.type, self.qual)
+ Seq.new(
+ seq_name: self.seq_name,
+ seq: self.seq.split('').shuffle!.join,
+ type: self.type,
+ qual: self.qual
+ )
end
# Method to shuffle a sequence randomly inline.
self
end
+ # Method to add two Seq objects.
+ def +(entry)
+ new_entry = Seq.new()
+ new_entry.seq = self.seq + entry.seq
+ new_entry.type = self.type if self.type == entry.type
+ new_entry.qual = self.qual + entry.qual if self.qual and entry.qual
+ new_entry
+ end
+
# Method to concatenate sequence entries.
def <<(entry)
raise SeqError, "sequences of different types" unless self.type == entry.type
seq_name = self.seq_name.nil? ? nil : self.seq_name.dup
- Seq.new(seq_name, seq, self.type, qual)
+ Seq.new(seq_name: seq_name, seq: seq, type: self.type, qual: qual)
end
# Method that replaces a sequence with a subsequence from a given start position
comp
end
- # Method that returns the length of the longest homopolymeric stretch
- # found in a sequence.
- def homopol_max(min = 1)
- return 0 if self.seq.nil? or self.seq.empty?
-
- found = false
-
- self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match|
- found = true
- min = match.size > min ? match.size : min
- end
-
- return 0 unless found
-
- min
- end
-
# Method that returns the percentage of hard masked residues
# or N's in a sequence.
def hard_mask