From 44868c672e33ad8c06529d79483f74bf006e8a68 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 20 Oct 2010 13:31:38 +0000 Subject: [PATCH] added patscan.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1143 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/Maasha/lib/patscan.rb | 142 ++++++++++++++++++++++++++++++++ code_ruby/Maasha/lib/seq.rb | 12 ++- 2 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 code_ruby/Maasha/lib/patscan.rb diff --git a/code_ruby/Maasha/lib/patscan.rb b/code_ruby/Maasha/lib/patscan.rb new file mode 100644 index 0000000..2d64773 --- /dev/null +++ b/code_ruby/Maasha/lib/patscan.rb @@ -0,0 +1,142 @@ +require 'fasta' +require 'open3' +require 'tempfile' + +# Error class for all exceptions to do with Patscan. +class PatscanError < StandardError; end + +# Class for executing the commandline tool scan_for_matches, which is used like this: +# scan_for_matches [options] pattern_file < fasta_data > output_data +class Patscan + @@initialized = false + @@options = [] + + # Method to initialize a Patscan object and where we save the pattern file + # and format the options only once. + def initialize(pattern, args = {}) + unless @@initialized + check_args(args) + pattern_save(pattern) + @@initialized = true + end + end + + # Method to scan the sequence in a Seq object for a pattern specified + # during initialization. + def scan(seq) + begin + stdin, stdout, stderr, wait_thr = Open3.popen3("scan_for_matches #{@@options.join(" ")}" ) + + stdin.puts seq.to_fasta + + stdin.close + + exit_status = wait_thr.value + + raise PatscanError, "scan_for_matches failed: #{stderr.read}" unless exit_status.success? + + fasta = Fasta.new(stdout) + + fasta.each do |entry| + yield Match.new(entry) + end + ensure + stdin.close unless stdin.closed? + stdout.close + stderr.close + end + end + + private + + # Method to check arguments to patscan and add these to @@options. + def check_args(args) + args.each_pair do |key,val| + case key + when "complement".to_sym + raise PatscanError, "Bad patscan complement value: #{val}" unless val == true or val == false + @@options << "-c" if val == true + when "protein".to_sym + raise PatscanError, "Bad patscan protein value: #{val}" unless val == true or val == false + @@options << "-p" if val == true + when "max_hits".to_sym + raise PatscanError, "Bad patscan protein value: #{val}" unless val.is_a? Fixnum and val > 0 + @@options << "-m #{val}" + else + raise PatscanError, "Unknown argument: #{key}" + end + end + end + + # Method to save pattern file and add + # the path to @@options. + def pattern_save(pattern) + file = Tempfile.new("patscan.pat") + ios = File.open(file, mode="w") + ios << pattern + ios.close + + @@options << file.path + end +end + +# Error class for all exceptions to do with Match. +class MatchError < StandardError; end + +# Class for matches from scan_for_matches. +class Match + attr_accessor :pattern, :pat_beg, :pat_end, :strand + # Method to initialize a Match object from + # a scan_for_matches hit given as a Seq object. + def initialize(entry) + @pattern = pattern_get(entry) + @pat_beg, @pat_end = pattern_beg_end(entry) + @strand = pattern_strand + end + + def to_bp + record = {} + record[:PATTERN] = self.pattern + record[:PAT_BEG] = self.pat_beg + record[:PAT_END] = self.pat_end + record[:PAT_LEN] = self.pat_end - self.pat_beg + 1 + record[:STRAND] = self.strand + record + end + + private + + # Method to extract the pattern. + def pattern_get(entry) + entry.seq + end + + # Method to extract the begin and end coordinates of + # the match. Note that scan_for_matches output is 1-based. + def pattern_beg_end(entry) + if entry.seq_name =~ /\[(\d+),(\d+)\]$/ + pat_beg = $1.to_i - 1 + pat_end = $2.to_i - 1 + else + raise MatchError, "Failed to get begin and end from: #{entry.seq_name}" + end + + [pat_beg, pat_end] + end + + # Method to determine the strand of the match - and reverse + # the pattern begin and end coordinates if the match is on the + # minus strand. + def pattern_strand + if @pat_beg < @pat_end + strand = "+" + else + strand = "-" + + @pat_beg, @pat_end = @pat_end, @pat_beg + end + + strand + end +end + diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index a8decdb..a797100 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -1,3 +1,5 @@ +require 'patscan' + # Residue alphabets DNA = %w[a t c g] RNA = %w[a u c g] @@ -192,6 +194,15 @@ class Seq end end + # Method that invoces patscan (a.k.a scan_for_matches) on + # a sequence object. + def patscan(pattern, args = {}) + ps = Patscan.new(pattern, args) + ps.scan(self) do |match| + yield match + end + end + private # Method to convert a Solexa score (odd ratio) to @@ -292,7 +303,6 @@ class Digest end end - __END__ -- 2.39.5