From db7133fb8a5c6ed7dadf271912d66e2b9a7be207 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 24 Oct 2013 10:05:05 +0000 Subject: [PATCH] refactored backtrack methods to use options hash in method invocations git-svn-id: http://biopieces.googlecode.com/svn/trunk@2248 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/find_adaptor | 8 +-- bp_bin/remove_primers | 10 +++- code_ruby/lib/maasha/seq/backtrack.rb | 65 +++++++++++++++------ code_ruby/test/maasha/seq/test_backtrack.rb | 45 +++++++------- 4 files changed, 82 insertions(+), 46 deletions(-) diff --git a/bp_bin/find_adaptor b/bp_bin/find_adaptor index c4df418..2fd757a 100755 --- a/bp_bin/find_adaptor +++ b/bp_bin/find_adaptor @@ -92,7 +92,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| entry = Seq.new_bp(record) if options[:forward] and record[:SEQ].length >= options[:forward].length - if m = entry.patmatch(options[:forward], 0, fmis, fins, fdel) + if m = entry.patmatch(options[:forward], max_mismatches: fmis, max_insertions: fins, max_deletions: fdel) record[:ADAPTOR_POS_LEFT] = m.pos record[:ADAPTOR_LEN_LEFT] = m.length record[:ADAPTOR_PAT_LEFT] = m.match @@ -107,7 +107,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| pat = pat[1 ... pat.length] - if m = entry.patmatch(pat, [0, len], fmis, fins, fdel) + if m = entry.patmatch(pat, start: 0, stop: len, max_mismatches: fmis, max_insertions: fins, max_deletions: fdel) record[:ADAPTOR_POS_LEFT] = m.pos record[:ADAPTOR_LEN_LEFT] = m.length record[:ADAPTOR_PAT_LEFT] = m.match @@ -121,7 +121,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| end if options[:reverse] and record[:SEQ].length >= options[:reverse].length - if m = entry.patmatch(options[:reverse], 0, rmis, rins, rdel) + if m = entry.patmatch(options[:reverse], max_mismatches: rmis, max_insertions: rins, max_deletions: rdel) record[:ADAPTOR_POS_RIGHT] = m.pos record[:ADAPTOR_LEN_RIGHT] = m.length record[:ADAPTOR_PAT_RIGHT] = m.match @@ -136,7 +136,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| pat = pat[0 ... pat.length - 1] - if m = entry.patmatch(pat, entry.length - len, rmis, rins, rdel) + if m = entry.patmatch(pat, start: entry.length - len, max_mismatches: rmis, max_insertions: rins, max_deletions: rdel) record[:ADAPTOR_POS_RIGHT] = m.pos record[:ADAPTOR_LEN_RIGHT] = m.length record[:ADAPTOR_PAT_RIGHT] = m.match diff --git a/bp_bin/remove_primers b/bp_bin/remove_primers index 1f7b7b2..88c1a81 100755 --- a/bp_bin/remove_primers +++ b/bp_bin/remove_primers @@ -52,7 +52,10 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| reverse = false seq = Seq.new_bp(record) - seq.patscan(options[:forward].to_s, 0, options[:mismatches], options[:insertions], options[:deletions]) do |match| + seq.patscan(options[:forward].to_s, + max_mismatches: options[:mismatches], + max_insertions: options[:insertions], + max_deletions: options[:deletions]) do |match| record[:FORWARD_POS] = match.pos record[:FORWARD_LEN] = match.length pos = match.pos + match.length @@ -62,7 +65,10 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| break end - seq.patscan(options[:reverse].to_s, 0, options[:mismatches], options[:insertions], options[:deletions]) do |match| + seq.patscan(options[:reverse].to_s, + max_mismatches: options[:mismatches], + max_insertions: options[:insertions], + max_deletions: options[:deletions]) do |match| record[:REVERSE_POS] = match.pos record[:REVERSE_LEN] = match.length pos = 0 diff --git a/code_ruby/lib/maasha/seq/backtrack.rb b/code_ruby/lib/maasha/seq/backtrack.rb index 8a2fb12..7822524 100644 --- a/code_ruby/lib/maasha/seq/backtrack.rb +++ b/code_ruby/lib/maasha/seq/backtrack.rb @@ -44,21 +44,34 @@ module BackTrack MAX_DEL = 5 # Maximum number of deletions allowed # ------------------------------------------------------------------------------ - # str.patmatch(pattern[, start|[start, stop][, max_mis[, max_ins[, max_del]]]]) + # str.patmatch(pattern[, options]) # -> Match - # str.patmatch(pattern[, start|[start, stop][, max_mis[, max_ins[, max_del]]]]) { |match| + # str.patmatch(pattern[, options]) { |match| # block # } # -> Match # + # options: + # :start + # :stop + # :max_mismatches + # :max_insertions + # :max_deletions + # # ------------------------------------------------------------------------------ # Method to iterate through a sequence from a given start position to the end of # the sequence or to a given stop position to locate a pattern allowing for a # maximum number of mismatches, insertions, and deletions. Insertions are # nucleotides found in the pattern but not in the sequence. Deletions are # nucleotides found in the sequence but not in the pattern. - def patmatch(pattern, start = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) - self.patscan(pattern, start, max_mismatches, max_insertions, max_deletions) do |m| + def patmatch(pattern, options = {}) + options[:start] ||= 0 + options[:stop] ||= self.length - 1 + options[:max_mismatches] ||= 0 + options[:max_insertions] ||= 0 + options[:max_deletions] ||= 0 + + self.patscan(pattern, options) do |m| if block_given? yield m else @@ -76,13 +89,20 @@ module BackTrack end # ------------------------------------------------------------------------------ - # str.patscan(pattern[, start|[start, stop][, max_mis[, max_ins[, max_del]]]]) + # str.patscan(pattern[, options]) # -> Array - # str.patscan(pattern[, start|[start, stop][, max_mis[, max_ins[, max_del]]]]) { |match| + # str.patscan(pattern[, options]) { |match| # block # } # -> Match # + # options: + # :start + # :stop + # :max_mismatches + # :max_insertions + # :max_deletions + # # ------------------------------------------------------------------------------ # Method to iterate through a sequence from a given start position to the end of # the sequence or to a given stop position to locate a pattern allowing for a @@ -91,23 +111,30 @@ module BackTrack # nucleotides found in the sequence but not in the pattern. Matches found in # block context return the Match object. Otherwise matches are returned in an # Array of Match objects. - def patscan(pattern, start = 0, max_mismatches = 0, max_insertions = 0, max_deletions = 0) - if start.is_a? Array - start, stop = *start - else - stop = self.length - 1 - end + def patscan(pattern, options = {}) + options[:start] ||= 0 + options[:stop] ||= self.length - 1 + options[:max_mismatches] ||= 0 + options[:max_insertions] ||= 0 + options[:max_deletions] ||= 0 raise BackTrackError, "Bad pattern: #{pattern}" unless pattern.downcase =~ OK_PATTERN - raise BackTrackError, "start: #{start} out of range (0 .. #{self.length - 1})" unless (0 ... self.length).include? start - raise BackTrackError, "stop: #{stop} out of range (0 .. #{self.length - 1})" unless (0 ... self.length).include? stop - raise BackTrackError, "max_mismatches: #{max_mismatches} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? max_mismatches - raise BackTrackError, "max_insertions: #{max_insertions} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? max_insertions - raise BackTrackError, "max_deletions: #{max_deletions} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? max_deletions + raise BackTrackError, "start: #{options[:start]} out of range (0 .. #{self.length - 1})" unless (0 ... self.length).include? options[:start] + raise BackTrackError, "stop: #{options[:stop]} out of range (0 .. #{self.length - 1})" unless (0 ... self.length).include? options[:stop] + raise BackTrackError, "max_mismatches: #{options[:max_mismatches]} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? options[:max_mismatches] + raise BackTrackError, "max_insertions: #{options[:max_insertions]} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? options[:max_insertions] + raise BackTrackError, "max_deletions: #{options[:max_deletions]} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? options[:max_deletions] matches = [] - while result = scan_C(self.seq, pattern, start, stop, max_mismatches, max_insertions, max_deletions) + while result = scan_C(self.seq, + pattern, + options[:start], + options[:stop], + options[:max_mismatches], + options[:max_insertions], + options[:max_deletions] + ) match = Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last]) if block_given? @@ -116,7 +143,7 @@ module BackTrack matches << match end - start = result.first + 1 + options[:start] = result.first + 1 end return matches.empty? ? nil : matches unless block_given? diff --git a/code_ruby/test/maasha/seq/test_backtrack.rb b/code_ruby/test/maasha/seq/test_backtrack.rb index 6000dc0..79d5300 100644 --- a/code_ruby/test/maasha/seq/test_backtrack.rb +++ b/code_ruby/test/maasha/seq/test_backtrack.rb @@ -51,66 +51,66 @@ class BackTrackTest < Test::Unit::TestCase test "#patscan with bad start raises" do [-1, 20].each { |start| - assert_raise(BackTrackError) { @seq.patscan("N", start) } + assert_raise(BackTrackError) { @seq.patscan("N", start: start) } } end test "#patscan with OK start dont raise" do [0, 19].each { |start| - assert_nothing_raised { @seq.patscan("N", start) } + assert_nothing_raised { @seq.patscan("N", start: start) } } end test "#patscan with bad stop raises" do [-1, 20].each { |stop| - assert_raise(BackTrackError) { @seq.patscan("N", [0, stop]) } + assert_raise(BackTrackError) { @seq.patscan("N", stop: stop) } } end test "#patscan with OK stop dont raise" do [0, 19].each { |stop| - assert_nothing_raised { @seq.patscan("N", [0, stop]) } + assert_nothing_raised { @seq.patscan("N", stop: stop) } } end test "#patscan with stop returns correctly" do - assert_nil(@seq.patmatch("G", [0, 2])) - assert_equal("3:1:g", @seq.patmatch("G", [0, 3]).to_s) + assert_nil(@seq.patmatch("G", start: 0, stop: 2)) + assert_equal("3:1:g", @seq.patmatch("G", start: 0, stop: 3).to_s) end test "#patscan with bad mis raises" do [-1, 6].each { |mis| - assert_raise(BackTrackError) { @seq.patscan("N", 0, mis) } + assert_raise(BackTrackError) { @seq.patscan("N", max_mismatches: mis) } } end test "#patscan with OK mis dont raise" do [0, 5].each { |mis| - assert_nothing_raised { @seq.patscan("N", 0, mis) } + assert_nothing_raised { @seq.patscan("N", max_mismatches: mis) } } end test "#patscan with bad ins raises" do [-1, 6].each { |ins| - assert_raise(BackTrackError) { @seq.patscan("N", 0, 0, ins) } + assert_raise(BackTrackError) { @seq.patscan("N", max_insertions: ins) } } end test "#patscan with OK ins dont raise" do [0, 5].each { |ins| - assert_nothing_raised { @seq.patscan("N", 0, 0, ins) } + assert_nothing_raised { @seq.patscan("N", max_insertions: ins) } } end test "#patscan with bad del raises" do [-1, 6].each { |del| - assert_raise(BackTrackError) { @seq.patscan("N", 0, 0, 0, del) } + assert_raise(BackTrackError) { @seq.patscan("N", max_deletions: del) } } end test "#patscan with OK del dont raise" do [0, 5].each { |del| - assert_nothing_raised { @seq.patscan("N", 0, 0, 0, del) } + assert_nothing_raised { @seq.patscan("N", max_deletions: del) } } end @@ -127,36 +127,39 @@ class BackTrackTest < Test::Unit::TestCase end test "#patscan start is ok" do - assert_equal("10:1:g", @seq.patscan("N", 10).first.to_s) - assert_equal("19:1:g", @seq.patscan("N", 10).last.to_s) + assert_equal("10:1:g", @seq.patscan("N", start: 10).first.to_s) + assert_equal("19:1:g", @seq.patscan("N", start: 10).last.to_s) end test "#patscan mis left is ok" do - assert_equal("0:7:tacgatg", @seq.patscan("Aacgatg", 0, 1).first.to_s) + assert_equal("0:7:tacgatg", @seq.patscan("Aacgatg", max_mismatches: 1).first.to_s) end test "#patscan mis right is ok" do - assert_equal("13:7:tgcacgg", @seq.patscan("tgcacgA", 0, 1).first.to_s) + assert_equal("13:7:tgcacgg", @seq.patscan("tgcacgA", max_mismatches: 1).first.to_s) end test "#patscan ins left is ok" do - assert_equal("0:7:tacgatg", @seq.patscan("Atacgatg", 0, 0, 1).first.to_s) + assert_equal("0:7:tacgatg", @seq.patscan("Atacgatg", max_insertions: 1).first.to_s) end test "#patscan ins right is ok" do - assert_equal("13:7:tgcacgg", @seq.patscan("tgcacggA", 0, 0, 1).first.to_s) + assert_equal("13:7:tgcacgg", @seq.patscan("tgcacggA", max_insertions: 1).first.to_s) end test "#patscan del left is ok" do - assert_equal("0:7:tacgatg", @seq.patscan("acgatg", 0, 0, 0, 1).first.to_s) + assert_equal("0:7:tacgatg", @seq.patscan("acgatg", max_deletions: 1).first.to_s) end test "#patscan del right is ok" do - assert_equal("12:8:atgcacgg", @seq.patscan("tgcacgg", 0, 0, 0, 1).first.to_s) + assert_equal("12:8:atgcacgg", @seq.patscan("tgcacgg", max_deletions: 1).first.to_s) end test "#patscan ambiguity mis ins del all ok" do - assert_equal("0:20:tacgatgctagcatgcacgg", @seq.patscan("tacatgcNagGatgcCacgg", 0, 1, 1, 1).first.to_s) + assert_equal("0:20:tacgatgctagcatgcacgg", @seq.patscan("tacatgcNagGatgcCacgg", + max_mismatches: 1, + max_insertions: 1, + max_deletions: 1).first.to_s) end end -- 2.39.2