--- /dev/null
+require 'fasta'
+require 'open3'
+require 'tempfile'
+
+# Error class for all exceptions to do with Patscan.
+class PatscanError < StandardError; end
+
+# Class for executing the commandline tool scan_for_matches, which is used like this:
+# scan_for_matches [options] pattern_file < fasta_data > output_data
+class Patscan
+ @@initialized = false
+ @@options = []
+
+ # Method to initialize a Patscan object and where we save the pattern file
+ # and format the options only once.
+ def initialize(pattern, args = {})
+ unless @@initialized
+ check_args(args)
+ pattern_save(pattern)
+ @@initialized = true
+ end
+ end
+
+ # Method to scan the sequence in a Seq object for a pattern specified
+ # during initialization.
+ def scan(seq)
+ begin
+ stdin, stdout, stderr, wait_thr = Open3.popen3("scan_for_matches #{@@options.join(" ")}" )
+
+ stdin.puts seq.to_fasta
+
+ stdin.close
+
+ exit_status = wait_thr.value
+
+ raise PatscanError, "scan_for_matches failed: #{stderr.read}" unless exit_status.success?
+
+ fasta = Fasta.new(stdout)
+
+ fasta.each do |entry|
+ yield Match.new(entry)
+ end
+ ensure
+ stdin.close unless stdin.closed?
+ stdout.close
+ stderr.close
+ end
+ end
+
+ private
+
+ # Method to check arguments to patscan and add these to @@options.
+ def check_args(args)
+ args.each_pair do |key,val|
+ case key
+ when "complement".to_sym
+ raise PatscanError, "Bad patscan complement value: #{val}" unless val == true or val == false
+ @@options << "-c" if val == true
+ when "protein".to_sym
+ raise PatscanError, "Bad patscan protein value: #{val}" unless val == true or val == false
+ @@options << "-p" if val == true
+ when "max_hits".to_sym
+ raise PatscanError, "Bad patscan protein value: #{val}" unless val.is_a? Fixnum and val > 0
+ @@options << "-m #{val}"
+ else
+ raise PatscanError, "Unknown argument: #{key}"
+ end
+ end
+ end
+
+ # Method to save pattern file and add
+ # the path to @@options.
+ def pattern_save(pattern)
+ file = Tempfile.new("patscan.pat")
+ ios = File.open(file, mode="w")
+ ios << pattern
+ ios.close
+
+ @@options << file.path
+ end
+end
+
+# Error class for all exceptions to do with Match.
+class MatchError < StandardError; end
+
+# Class for matches from scan_for_matches.
+class Match
+ attr_accessor :pattern, :pat_beg, :pat_end, :strand
+ # Method to initialize a Match object from
+ # a scan_for_matches hit given as a Seq object.
+ def initialize(entry)
+ @pattern = pattern_get(entry)
+ @pat_beg, @pat_end = pattern_beg_end(entry)
+ @strand = pattern_strand
+ end
+
+ def to_bp
+ record = {}
+ record[:PATTERN] = self.pattern
+ record[:PAT_BEG] = self.pat_beg
+ record[:PAT_END] = self.pat_end
+ record[:PAT_LEN] = self.pat_end - self.pat_beg + 1
+ record[:STRAND] = self.strand
+ record
+ end
+
+ private
+
+ # Method to extract the pattern.
+ def pattern_get(entry)
+ entry.seq
+ end
+
+ # Method to extract the begin and end coordinates of
+ # the match. Note that scan_for_matches output is 1-based.
+ def pattern_beg_end(entry)
+ if entry.seq_name =~ /\[(\d+),(\d+)\]$/
+ pat_beg = $1.to_i - 1
+ pat_end = $2.to_i - 1
+ else
+ raise MatchError, "Failed to get begin and end from: #{entry.seq_name}"
+ end
+
+ [pat_beg, pat_end]
+ end
+
+ # Method to determine the strand of the match - and reverse
+ # the pattern begin and end coordinates if the match is on the
+ # minus strand.
+ def pattern_strand
+ if @pat_beg < @pat_end
+ strand = "+"
+ else
+ strand = "-"
+
+ @pat_beg, @pat_end = @pat_end, @pat_beg
+ end
+
+ strand
+ end
+end
+