# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-require 'inline'
+require 'inline' # RubyInline
# Error class for all exceptions to do with BackTrack.
class BackTrackError < StandardError; end
# Module containing code to locate nucleotide patterns in sequences allowing for
# ambiguity codes and a given maximum mismatches, insertions, and deletions. The
# pattern match engine is based on a backtrack algorithm.
+# Insertions are nucleotides found in the pattern but not in the sequence.
+# Deletions are nucleotides found in the sequence but not in the pattern.
+# Algorithm based on code kindly provided by j_random_hacker @ Stackoverflow.
module BackTrack
OK_PATTERN = Regexp.new('^[flsycwphqrimtnkvadegu]+$')
+ MAX_MIS = 5 # Maximum number of mismatches allowed
+ MAX_INS = 5 # Maximum number of insertions allowed
+ MAX_DEL = 5 # Maximum number of deletions allowed
# ------------------------------------------------------------------------------
# str.scan(pattern[, max_mismatches [, max_insertions [,max_deletions]]])
# ------------------------------------------------------------------------------
# Method to iterate through a sequence to locate pattern matches starting at a
# given offset allowing for a maximum number of mismatches, insertions, and
- # deletions. Matches found in block context return the Match object. Otherwise
+ # deletions. Insertions are nucleotides found in the pattern but not in the
+ # sequence. Deletions are nucleotides found in the sequence but not in the
+ # pattern. Matches found in block context return the Match object. Otherwise
# matches are returned in an Array of Match objects.
def patscan(pattern, offset = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
raise BackTrackError, "Bad pattern: #{pattern}" unless pattern.downcase =~ OK_PATTERN
+ raise BackTrackError, "offset: #{offset} out of range (0 ... #{self.length - 1})" unless (0 ... self.length).include? offset
+ raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches
+ raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions
+ raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions
+
matches = []
if block_given?
else
{
if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r;
- if (ins && *s && (r = backtrack(ss, s + 1, p, mm, ins - 1, del))) return r;
- if (del && *p && (r = backtrack(ss, s, p + 1, mm, ins, del - 1))) return r;
+ if (ins && *p && (r = backtrack(ss, s, p + 1, mm, ins - 1, del))) return r;
+ if (del && *s && (r = backtrack(ss, s + 1, p, mm, ins, del - 1))) return r;
}
return 0;