From: martinahansen Date: Wed, 5 Dec 2012 09:32:46 +0000 (+0000) Subject: refactor match->patmatch scan->patscan X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=54643d6bd6acb5c019d40ddbf7394ce0cdcfdba7;p=biopieces.git refactor match->patmatch scan->patscan git-svn-id: http://biopieces.googlecode.com/svn/trunk@2028 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/seq/backtrack.rb b/code_ruby/lib/maasha/seq/backtrack.rb index 948d68b..1d09c30 100644 --- a/code_ruby/lib/maasha/seq/backtrack.rb +++ b/code_ruby/lib/maasha/seq/backtrack.rb @@ -40,8 +40,8 @@ module BackTrack MAX_INS = 5 # Maximum number of insertions allowed MAX_DEL = 5 # Maximum number of deletions allowed - def patmatch(pattern, offset = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) - self.patscan(pattern, offset, max_mismatches, max_insertions, max_deletions) do |m| + def patmatch(pattern, start = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) + self.patscan(pattern, start, max_mismatches, max_insertions, max_deletions) do |m| if block_given? yield m else @@ -59,39 +59,47 @@ module BackTrack end # ------------------------------------------------------------------------------ - # str.scan(pattern[, max_mismatches[, max_insertions[, max_deletions]]]) + # str.scan(pattern[, start|[start, stop][, max_mismatches[, max_insertions[, max_deletions]]]]) # -> Array - # str.scan(pattern[, max_mismatches[, max_insertions[, max_deletions]]]) { |match| + # str.scan(pattern[, start|[start, stop][, max_mismatches[, max_insertions[, max_deletions]]]]) { |match| # block # } # -> Match # # ------------------------------------------------------------------------------ - # Method to iterate through a sequence to locate pattern matches starting at a - # given offset allowing for a maximum number of mismatches, insertions, and - # deletions. Insertions are nucleotides found in the pattern but not in the - # sequence. Deletions are nucleotides found in the sequence but not in the - # pattern. Matches found in block context return the Match object. Otherwise - # matches are returned in an Array of Match objects. - def patscan(pattern, offset = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) + # Method to iterate through a sequence from a given start position to the end of + # the sequence or to a given stop position to locate a pattern allowing for a + # maximum number of mismatches, insertions, and deletions. Insertions are + # nucleotides found in the pattern but not in the sequence. Deletions are + # nucleotides found in the sequence but not in the pattern. Matches found in + # block context return the Match object. Otherwise matches are returned in an + # Array of Match objects. + def patscan(pattern, start = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) + if start.is_a? Array + start, stop = *start + else + stop = self.length - 1 + end + raise BackTrackError, "Bad pattern: #{pattern}" unless pattern.downcase =~ OK_PATTERN - raise BackTrackError, "offset: #{offset} out of range (0 ... #{self.length})" unless (0 ... self.length).include? offset - raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches - raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions - raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions +# raise BackTrackError, "start: #{start} out of range (0 .. #{self.length - 1})" unless (0 .. self.length).include? start +# raise BackTrackError, "stop: #{stop} out of range (0 .. #{self.length - 1})" unless (0 .. self.length).include? stop +# raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches +# raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions +# raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions matches = [] - if block_given? - while result = scan_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) - yield Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) - offset = result.first + 1 - end - else - while result = scan_C(self.seq, self.length, pattern, offset, max_mismatches, max_insertions, max_deletions) - matches << Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) - offset = result.first + 1 + while result = scan_C(self.seq, pattern, start, stop, max_mismatches, max_insertions, max_deletions) + match = Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) + + if block_given? + yield match + else + matches << match end + + start = result.first + 1 end return matches.empty? ? nil : matches unless block_given? @@ -165,31 +173,31 @@ module BackTrack # insertions and del deletions. builder.c %{ VALUE scan_C( - VALUE _s, // Sequence - VALUE _len, // Sequence length - VALUE _p, // Pattern - VALUE _pos, // Start position - VALUE _mis, // Maximum mismatches - VALUE _ins, // Maximum insertions - VALUE _del // Maximum deletions + VALUE _s, // Sequence + VALUE _p, // Pattern + VALUE _start, // Search postition start + VALUE _stop, // Search position stop + VALUE _mis, // Maximum mismatches + VALUE _ins, // Maximum insertions + VALUE _del // Maximum deletions ) { - char *s = StringValuePtr(_s); - char *p = StringValuePtr(_p); - unsigned int len = FIX2UINT(_len); - unsigned int pos = FIX2UINT(_pos); - unsigned int mis = FIX2UINT(_mis); - unsigned int ins = FIX2UINT(_ins); - unsigned int del = FIX2UINT(_del); + char *s = StringValuePtr(_s); + char *p = StringValuePtr(_p); + unsigned int start = FIX2UINT(_start); + unsigned int stop = FIX2UINT(_stop); + unsigned int mis = FIX2UINT(_mis); + unsigned int ins = FIX2UINT(_ins); + unsigned int del = FIX2UINT(_del); char *ss = s; int state = 0; unsigned int e = 0; VALUE tuple; - if (pos < len) + if (start <= stop) { - s += pos; + s += start; while (*s) { diff --git a/code_ruby/lib/maasha/seq/patternmatcher.rb b/code_ruby/lib/maasha/seq/patternmatcher.rb index 0641890..cd0d90a 100644 --- a/code_ruby/lib/maasha/seq/patternmatcher.rb +++ b/code_ruby/lib/maasha/seq/patternmatcher.rb @@ -32,26 +32,39 @@ require 'inline' # Inspired by the paper by Bruno Woltzenlogel Paleo (page 197): # http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf module PatternMatcher - def match(pattern, pos = 0, max_edit_distance = 0) - self.scan(pattern, pos, max_edit_distance) do |m| + # ------------------------------------------------------------------------------ + # str.patmatch(pattern[, pos[, max_edit_distance]]) + # -> Match or nil + # str.patscan(pattern[, pos[, max_edit_distance]]) { |match| + # block + # } + # -> Match + # + # ------------------------------------------------------------------------------ + # Method to iterate through a sequence to locate pattern matches starting from a + # given position and allowing for a maximum edit distance. Matches found in + # block context return the Match object. Otherwise matches are returned in an + # Array. + def patmatch(pattern, pos = 0, max_edit_distance = 0) + self.patscan(pattern, pos, max_edit_distance) do |m| return m end end # ------------------------------------------------------------------------------ - # str.scan(pattern[, pos[, max_edit_distance]]) - # -> Array - # str.scan(pattern[, pos[, max_edit_distance]]) { |match| + # str.patscan(pattern[, pos[, max_edit_distance]]) + # -> Array or nil + # str.patscan(pattern[, pos[, max_edit_distance]]) { |match| # block # } # -> Match # # ------------------------------------------------------------------------------ - # Method to iterate through a sequence to locate pattern matches starting - # from a given position and allowing for a maximum edit distance. - # Matches found in block context return the Match object. Otherwise matches are - # returned in an Array. - def scan(pattern, pos = 0, max_edit_distance = 0) + # Method to iterate through a sequence to locate pattern matches starting from a + # given position and allowing for a maximum edit distance. Matches found in + # block context return the Match object. Otherwise matches are returned in an + # Array. + def patscan(pattern, pos = 0, max_edit_distance = 0) matches = [] while result = match_C(self.seq, self.length, pattern, pattern.length, pos, max_edit_distance)