From: martinahansen Date: Thu, 17 Nov 2011 07:28:58 +0000 (+0000) Subject: fixed ins/del reverse X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=f40fdc8f55be97c491ad5d2caba71bab5a970248;p=biopieces.git fixed ins/del reverse git-svn-id: http://biopieces.googlecode.com/svn/trunk@1655 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/backtrack.rb b/code_ruby/lib/maasha/backtrack.rb index acc5b8c..02eb047 100644 --- a/code_ruby/lib/maasha/backtrack.rb +++ b/code_ruby/lib/maasha/backtrack.rb @@ -22,7 +22,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'inline' +require 'inline' # RubyInline # Error class for all exceptions to do with BackTrack. class BackTrackError < StandardError; end @@ -30,8 +30,14 @@ class BackTrackError < StandardError; end # Module containing code to locate nucleotide patterns in sequences allowing for # ambiguity codes and a given maximum mismatches, insertions, and deletions. The # pattern match engine is based on a backtrack algorithm. +# Insertions are nucleotides found in the pattern but not in the sequence. +# Deletions are nucleotides found in the sequence but not in the pattern. +# Algorithm based on code kindly provided by j_random_hacker @ Stackoverflow. module BackTrack OK_PATTERN = Regexp.new('^[flsycwphqrimtnkvadegu]+$') + MAX_MIS = 5 # Maximum number of mismatches allowed + MAX_INS = 5 # Maximum number of insertions allowed + MAX_DEL = 5 # Maximum number of deletions allowed # ------------------------------------------------------------------------------ # str.scan(pattern[, max_mismatches [, max_insertions [,max_deletions]]]) @@ -44,10 +50,17 @@ module BackTrack # ------------------------------------------------------------------------------ # Method to iterate through a sequence to locate pattern matches starting at a # given offset allowing for a maximum number of mismatches, insertions, and - # deletions. Matches found in block context return the Match object. Otherwise + # deletions. Insertions are nucleotides found in the pattern but not in the + # sequence. Deletions are nucleotides found in the sequence but not in the + # pattern. Matches found in block context return the Match object. Otherwise # matches are returned in an Array of Match objects. def patscan(pattern, offset = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) raise BackTrackError, "Bad pattern: #{pattern}" unless pattern.downcase =~ OK_PATTERN + raise BackTrackError, "offset: #{offset} out of range (0 ... #{self.length - 1})" unless (0 ... self.length).include? offset + raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches + raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions + raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions + matches = [] if block_given? @@ -108,8 +121,8 @@ module BackTrack else { if (mm && *s && *p && (r = backtrack(ss, s + 1, p + 1, mm - 1, ins, del))) return r; - if (ins && *s && (r = backtrack(ss, s + 1, p, mm, ins - 1, del))) return r; - if (del && *p && (r = backtrack(ss, s, p + 1, mm, ins, del - 1))) return r; + if (ins && *p && (r = backtrack(ss, s, p + 1, mm, ins - 1, del))) return r; + if (del && *s && (r = backtrack(ss, s + 1, p, mm, ins, del - 1))) return r; } return 0;