MAX_INS = 5 # Maximum number of insertions allowed
MAX_DEL = 5 # Maximum number of deletions allowed
- def patmatch(pattern, offset = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
- self.patscan(pattern, offset, max_mismatches, max_insertions, max_deletions) do |m|
+ def patmatch(pattern, start = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
+ self.patscan(pattern, start, max_mismatches, max_insertions, max_deletions) do |m|
if block_given?
yield m
else
end
# ------------------------------------------------------------------------------
- # str.scan(pattern[, max_mismatches[, max_insertions[, max_deletions]]])
+ # str.scan(pattern[, start|[start, stop][, max_mismatches[, max_insertions[, max_deletions]]]])
# -> Array
- # str.scan(pattern[, max_mismatches[, max_insertions[, max_deletions]]]) { |match|
+ # str.scan(pattern[, start|[start, stop][, max_mismatches[, max_insertions[, max_deletions]]]]) { |match|
# block
# }
# -> Match
#
# ------------------------------------------------------------------------------
- # Method to iterate through a sequence to locate pattern matches starting at a
- # given offset allowing for a maximum number of mismatches, insertions, and
- # deletions. Insertions are nucleotides found in the pattern but not in the
- # sequence. Deletions are nucleotides found in the sequence but not in the
- # pattern. Matches found in block context return the Match object. Otherwise
- # matches are returned in an Array of Match objects.
- def patscan(pattern, offset = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
+ # Method to iterate through a sequence from a given start position to the end of
+ # the sequence or to a given stop position to locate a pattern allowing for a
+ # maximum number of mismatches, insertions, and deletions. Insertions are
+ # nucleotides found in the pattern but not in the sequence. Deletions are
+ # nucleotides found in the sequence but not in the pattern. Matches found in
+ # block context return the Match object. Otherwise matches are returned in an
+ # Array of Match objects.
+ def patscan(pattern, start = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0)
+ if start.is_a? Array
+ start, stop = *start
+ else
+ stop = self.length - 1
+ end
+
raise BackTrackError, "Bad pattern: #{pattern}" unless pattern.downcase =~ OK_PATTERN
- raise BackTrackError, "offset: #{offset} out of range (0 ... #{self.length})" unless (0 ... self.length).include? offset
- raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches
- raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions
- raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions
+# raise BackTrackError, "start: #{start} out of range (0 .. #{self.length - 1})" unless (0 .. self.length).include? start
+# raise BackTrackError, "stop: #{stop} out of range (0 .. #{self.length - 1})" unless (0 .. self.length).include? stop
+# raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches
+# raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions
+# raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions
matches = []
- if block_given?
- while result = scan_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
- yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
- offset = result.first + 1
- end
- else
- while result = scan_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions)
- matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
- offset = result.first + 1
+ while result = scan_C(self.seq, pattern, start, stop, max_mismatches, max_insertions, max_deletions)
+ match = Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
+
+ if block_given?
+ yield match
+ else
+ matches << match
end
+
+ start = result.first + 1
end
return matches.empty? ? nil : matches unless block_given?
# insertions and del deletions.
builder.c %{
VALUE scan_C(
- VALUE _s, // Sequence
- VALUE _len, // Sequence length
- VALUE _p, // Pattern
- VALUE _pos, // Start position
- VALUE _mis, // Maximum mismatches
- VALUE _ins, // Maximum insertions
- VALUE _del // Maximum deletions
+ VALUE _s, // Sequence
+ VALUE _p, // Pattern
+ VALUE _start, // Search postition start
+ VALUE _stop, // Search position stop
+ VALUE _mis, // Maximum mismatches
+ VALUE _ins, // Maximum insertions
+ VALUE _del // Maximum deletions
)
{
- char *s = StringValuePtr(_s);
- char *p = StringValuePtr(_p);
- unsigned int len = FIX2UINT(_len);
- unsigned int pos = FIX2UINT(_pos);
- unsigned int mis = FIX2UINT(_mis);
- unsigned int ins = FIX2UINT(_ins);
- unsigned int del = FIX2UINT(_del);
+ char *s = StringValuePtr(_s);
+ char *p = StringValuePtr(_p);
+ unsigned int start = FIX2UINT(_start);
+ unsigned int stop = FIX2UINT(_stop);
+ unsigned int mis = FIX2UINT(_mis);
+ unsigned int ins = FIX2UINT(_ins);
+ unsigned int del = FIX2UINT(_del);
char *ss = s;
int state = 0;
unsigned int e = 0;
VALUE tuple;
- if (pos < len)
+ if (start <= stop)
{
- s += pos;
+ s += start;
while (*s)
{