From b676baacf06b64c51fcb3cb5041e8fa3bbd3e5cb Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 17 Jun 2013 09:49:58 +0000 Subject: [PATCH] updated patscan_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@2177 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/patscan_seq | 109 +++++++++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 43 deletions(-) diff --git a/bp_bin/patscan_seq b/bp_bin/patscan_seq index 089954d..64fc6dd 100755 --- a/bp_bin/patscan_seq +++ b/bp_bin/patscan_seq @@ -36,30 +36,32 @@ require 'maasha/seq' # Class with methods to execute Patscan (a.k.a. scan_for_matches). class Patscan # Method to initialize a Patscan object. - def initialize(pat_file, in_file, out_file, pattern) - @pat_file = pat_file + def initialize(tmp_dir, in_file, patterns) + @tmp_dir = tmp_dir @in_file = in_file - @out_file = out_file - @pattern = pattern + @patterns = patterns - pattern_save + patterns_save end # Method to run Patscan - def run(comp = nil, type = nil, max_hit = nil, max_mis = nil, verbose = nil) - args = [] - args << "scan_for_matches" - args << "-c" if comp - args << "-p" if type == :protein - args << "-n #{max_mis}" if max_mis - args << "-m #{max_hit}" if max_hit - args << @pat_file - args << "< #{@in_file}" - args << "> #{@out_file}" - command = args.join(" ") - $stderr.puts command if verbose - system(command) - raise "Command failed: #{command}" unless $?.success? + def run(options) + @patterns.each_with_index do |pattern, i| + args = [] + args << "scan_for_matches" + args << "-c" if options[:comp] + args << "-p" if options[:seq_type] == :protein + args << "-o" if options[:overlap] + args << "-n #{options[:max_misses]}" if options[:max_misses] + args << "-m #{options[:max_hits]}" if options[:max_hits] + args << File.join(@tmp_dir, "#{i}.pat") + args << "< #{@in_file}" + args << "> " + File.join(@tmp_dir, "#{i}.out") + command = args.join(" ") + $stderr.puts command if options[:verbose] + system(command) + raise "Command failed: #{command}" unless $?.success? + end end # Method to parse Patscan results and return @@ -68,23 +70,25 @@ class Patscan def results_parse results = Hash.new { |h, k| h[k] = [] } - Fasta.open(@out_file, 'r') do |ios| - ios.each do |entry| - if entry.seq_name =~ /([^:]+):\[(\d+),(\d+)\]/ - id = $1.to_i - start = $2.to_i - 1 - stop = $3.to_i - 1 - - if stop > start - strand = '+' + @patterns.each_with_index do |pattern, i| + Fasta.open(File.join(@tmp_dir, "#{i}.out"), 'r') do |ios| + ios.each do |entry| + if entry.seq_name =~ /([^:]+):\[(\d+),(\d+)\]/ + id = $1.to_i + start = $2.to_i - 1 + stop = $3.to_i - 1 + + if stop > start + strand = '+' + else + start, stop = stop, start + strand = '-' + end + + results[id.to_i] << Match.new(start, stop, strand, entry.seq) else - start, stop = stop, start - strand = '-' + raise "Failed to parse seq_name: #{entry.seq_name} in patscan result" end - - results[id.to_i] << Match.new(start, stop, strand, entry.seq) - else - raise "Failed to parse seq_name: #{entry.seq_name} in patscan result" end end end @@ -95,9 +99,11 @@ class Patscan private # Method to save a patscan pattern to a file. - def pattern_save - File.open(@pat_file, "w") do |ios| - ios.puts @pattern + def patterns_save + @patterns.each_with_index do |pattern, i| + File.open(File.join(@tmp_dir, "#{i}.pat"), 'w') do |ios| + ios.puts pattern + end end end @@ -119,20 +125,35 @@ class Patscan end casts = [] -casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'pattern_in', :short=>'P', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'inline', :short=>'i', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'overlap', :short=>'o', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'max_hits', :short=>'h', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} casts << {:long=>'max_misses', :short=>'m', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} options = Biopieces.options_parse(ARGV, casts) +unless options[:pattern] or options[:pattern_in] + raise ArgumentError, "--pattern or --pattern_in must be specified" +end + +patterns = [] + +if options[:pattern_in] + File.open(options[:pattern_in], 'r') do |ios| + ios.each_line { |l| patterns << l.chomp } + end +else + patterns << options[:pattern] +end + +raise ArgumentError, "no patterns found" if patterns.empty? + tmp_dir = Biopieces.mktmpdir tmp_file = File.join(tmp_dir, "tmp.stream") -pat_file = File.join(tmp_dir, "pat.txt") in_file = File.join(tmp_dir, "in.fna") -out_file = File.join(tmp_dir, "out.fna") -out_file = File.join(tmp_dir, "out.fna") seq_name_count = 0 seq_name_hash = {} @@ -160,8 +181,10 @@ Biopieces.open(options[:stream_in], tmp_file) do |input, output| end end -patscan = Patscan.new(pat_file, in_file, out_file, options[:pattern]) -patscan.run(options[:comp], seq_type, options[:max_hits], options[:max_misses], options[:verbose]) +options[:seq_type] = seq_type + +patscan = Patscan.new(tmp_dir, in_file, patterns) +patscan.run(options) results = patscan.results_parse Biopieces.open(tmp_file, options[:stream_out]) do |input, output| -- 2.39.2