]> git.donarmstrong.com Git - biopieces.git/commitdiff
added patscan.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 20 Oct 2010 13:31:38 +0000 (13:31 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 20 Oct 2010 13:31:38 +0000 (13:31 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1143 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/patscan.rb [new file with mode: 0644]
code_ruby/Maasha/lib/seq.rb

diff --git a/code_ruby/Maasha/lib/patscan.rb b/code_ruby/Maasha/lib/patscan.rb
new file mode 100644 (file)
index 0000000..2d64773
--- /dev/null
@@ -0,0 +1,142 @@
+require 'fasta'
+require 'open3'
+require 'tempfile'
+
+# Error class for all exceptions to do with Patscan.
+class PatscanError < StandardError; end
+
+# Class for executing the commandline tool scan_for_matches, which is used like this:
+# scan_for_matches [options] pattern_file < fasta_data > output_data
+class Patscan
+  @@initialized = false
+  @@options     = []
+
+  # Method to initialize a Patscan object and where we save the pattern file
+  # and format the options only once.
+  def initialize(pattern, args = {})
+    unless @@initialized
+      check_args(args)
+      pattern_save(pattern)
+      @@initialized = true
+    end
+  end
+
+  # Method to scan the sequence in a Seq object for a pattern specified
+  # during initialization.
+  def scan(seq)
+    begin
+      stdin, stdout, stderr, wait_thr = Open3.popen3("scan_for_matches #{@@options.join(" ")}" )
+
+      stdin.puts seq.to_fasta
+
+      stdin.close
+
+      exit_status = wait_thr.value
+
+      raise PatscanError, "scan_for_matches failed: #{stderr.read}" unless exit_status.success?
+
+      fasta = Fasta.new(stdout)
+
+      fasta.each do |entry|
+        yield Match.new(entry)
+      end
+    ensure
+      stdin.close unless stdin.closed?
+      stdout.close
+      stderr.close
+    end
+  end
+
+  private
+
+  # Method to check arguments to patscan and add these to @@options.
+  def check_args(args)
+    args.each_pair do |key,val|
+      case key
+      when "complement".to_sym
+        raise PatscanError, "Bad patscan complement value: #{val}" unless val == true or val == false
+        @@options << "-c" if val == true
+      when "protein".to_sym
+        raise PatscanError, "Bad patscan protein value: #{val}" unless val == true or val == false
+        @@options << "-p" if val == true
+      when "max_hits".to_sym
+        raise PatscanError, "Bad patscan protein value: #{val}" unless val.is_a? Fixnum and val > 0
+        @@options << "-m #{val}"
+      else
+        raise PatscanError, "Unknown argument: #{key}"
+      end
+    end
+  end
+
+  # Method to save pattern file and add
+  # the path to @@options.
+  def pattern_save(pattern)
+    file = Tempfile.new("patscan.pat")
+    ios  = File.open(file, mode="w")
+    ios << pattern
+    ios.close
+
+    @@options << file.path
+  end
+end
+
+# Error class for all exceptions to do with Match.
+class MatchError < StandardError; end
+
+# Class for matches from scan_for_matches.
+class Match
+  attr_accessor :pattern, :pat_beg, :pat_end, :strand
+  # Method to initialize a Match object from
+  # a scan_for_matches hit given as a Seq object.
+  def initialize(entry)
+    @pattern           = pattern_get(entry)
+    @pat_beg, @pat_end = pattern_beg_end(entry)
+    @strand            = pattern_strand
+  end
+
+  def to_bp
+    record = {}
+    record[:PATTERN]  = self.pattern
+    record[:PAT_BEG]  = self.pat_beg
+    record[:PAT_END]  = self.pat_end
+    record[:PAT_LEN]  = self.pat_end - self.pat_beg + 1
+    record[:STRAND]   = self.strand
+    record
+  end
+
+  private
+
+  # Method to extract the pattern.
+  def pattern_get(entry)
+    entry.seq
+  end
+
+  # Method to extract the begin and end coordinates of
+  # the match. Note that scan_for_matches output is 1-based.
+  def pattern_beg_end(entry)
+    if entry.seq_name =~ /\[(\d+),(\d+)\]$/
+      pat_beg = $1.to_i - 1
+      pat_end = $2.to_i - 1
+    else
+      raise MatchError, "Failed to get begin and end from: #{entry.seq_name}"
+    end
+
+    [pat_beg, pat_end]
+  end
+
+  # Method to determine the strand of the match - and reverse
+  # the pattern begin and end coordinates if the match is on the
+  # minus strand.
+  def pattern_strand
+    if @pat_beg < @pat_end
+      strand = "+"
+    else
+      strand = "-"
+
+      @pat_beg, @pat_end = @pat_end, @pat_beg
+    end
+
+    strand
+  end
+end
+
index a8decdb09d00643a3e38c85d6a22ec1c182cc8ec..a7971000ecd11a7d63640e1d21475e8130752141 100644 (file)
@@ -1,3 +1,5 @@
+require 'patscan'
+
 # Residue alphabets
 DNA     = %w[a t c g]
 RNA     = %w[a u c g]
@@ -192,6 +194,15 @@ class Seq
     end
   end
 
+  # Method that invoces patscan (a.k.a scan_for_matches) on
+  # a sequence object.
+  def patscan(pattern, args = {})
+    ps = Patscan.new(pattern, args)
+    ps.scan(self) do |match|
+      yield match
+    end
+  end
+
   private
 
   # Method to convert a Solexa score (odd ratio) to
@@ -292,7 +303,6 @@ class Digest
   end
 end
 
-
 __END__