+++ /dev/null
-DEBUG = false
-
-module PatFind
- @@pos = 0
-
- def scan(pattern, max_mis = 0, max_ins = 0, max_del = 0)
- if block_given?
- while m = match(pattern, @@pos, max_mis, max_ins, max_del)
- @@pos += m.pos + 1
-
- yield m
- end
- else
- matches = []
-
- while m = match(pattern, @@pos, max_mis, max_ins, max_del)
- matches << m
-
- @@pos += m.pos + 1
- end
-
- return matches
- end
- end
-
- def match(pattern, pos = 0, max_mis = 0, max_ins = 0, max_del = 0)
- if pattern[0] == self.seq[pos]
- puts self.seq[pos]
- end
-
- block_given? ? (yield m) : (return m)
- end
-end
-
-# Class containing match information for use with PatFind.
-class Match
- attr_accessor :pattern, :pos, :length, :mismatches, :insertions, :deletions
-
- def initialize(pattern, pos, mismatches, insertions, deletions)
- @pattern = pattern
- @pos = pos
- @length = pattern.length
- @mismatches = mismatches
- @insertions = insertions
- @deletions = deletions
- end
-end
-
+++ /dev/null
-require 'fasta'
-require 'open3'
-require 'tempfile'
-
-# Error class for all exceptions to do with Patscan.
-class PatscanError < StandardError; end
-
-# Class for executing the commandline tool scan_for_matches, which is used like this:
-# scan_for_matches [options] pattern_file < fasta_data > output_data
-class Patscan
- @@initialized = false
- @@options = []
-
- # Method to initialize a Patscan object and where we save the pattern file
- # and format the options only once.
- def initialize(pattern, args = {})
- unless @@initialized
- check_args(args)
- pattern_save(pattern)
- @@initialized = true
- end
- end
-
- # Method to scan the sequence in a Seq object for a pattern specified
- # during initialization.
- def scan(seq)
- begin
- stdin, stdout, stderr, wait_thr = Open3.popen3("scan_for_matches #{@@options.join(" ")}" )
-
- stdin.puts seq.to_fasta
-
- stdin.close
-
- exit_status = wait_thr.value
-
- raise PatscanError, "scan_for_matches failed: #{stderr.read}" unless exit_status.success?
-
- fasta = Fasta.new(stdout)
-
- fasta.each do |entry|
- yield Match.new(entry)
- end
- ensure
- stdin.close unless stdin.closed?
- stdout.close
- stderr.close
- end
- end
-
- private
-
- # Method to check arguments to patscan and add these to @@options.
- def check_args(args)
- args.each_pair do |key,val|
- case key
- when "complement".to_sym
- raise PatscanError, "Bad patscan complement value: #{val}" unless val == true or val == false
- @@options << "-c" if val == true
- when "protein".to_sym
- raise PatscanError, "Bad patscan protein value: #{val}" unless val == true or val == false
- @@options << "-p" if val == true
- when "max_hits".to_sym
- raise PatscanError, "Bad patscan protein value: #{val}" unless val.is_a? Fixnum and val > 0
- @@options << "-m #{val}"
- else
- raise PatscanError, "Unknown argument: #{key}"
- end
- end
- end
-
- # Method to save pattern file and add
- # the path to @@options.
- def pattern_save(pattern)
- file = Tempfile.new("patscan.pat")
- ios = File.open(file, mode="w")
- ios << pattern
- ios.close
-
- @@options << file.path
- end
-end
-
-# Error class for all exceptions to do with Match.
-class MatchError < StandardError; end
-
-# Class for matches from scan_for_matches.
-class Match
- attr_accessor :pattern, :pat_beg, :pat_end, :strand
- # Method to initialize a Match object from
- # a scan_for_matches hit given as a Seq object.
- def initialize(entry)
- @pattern = pattern_get(entry)
- @pat_beg, @pat_end = pattern_beg_end(entry)
- @strand = pattern_strand
- end
-
- def to_bp
- record = {}
- record[:PATTERN] = self.pattern
- record[:PAT_BEG] = self.pat_beg
- record[:PAT_END] = self.pat_end
- record[:PAT_LEN] = self.pat_end - self.pat_beg + 1
- record[:STRAND] = self.strand
- record
- end
-
- private
-
- # Method to extract the pattern.
- def pattern_get(entry)
- entry.seq
- end
-
- # Method to extract the begin and end coordinates of
- # the match. Note that scan_for_matches output is 1-based.
- def pattern_beg_end(entry)
- if entry.seq_name =~ /\[(\d+),(\d+)\]$/
- pat_beg = $1.to_i - 1
- pat_end = $2.to_i - 1
- else
- raise MatchError, "Failed to get begin and end from: #{entry.seq_name}"
- end
-
- [pat_beg, pat_end]
- end
-
- # Method to determine the strand of the match - and reverse
- # the pattern begin and end coordinates if the match is on the
- # minus strand.
- def pattern_strand
- if @pat_beg < @pat_end
- strand = "+"
- else
- strand = "-"
-
- @pat_beg, @pat_end = @pat_end, @pat_beg
- end
-
- strand
- end
-end
-