X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fpatscan_seq;h=28f028dec433ccb5ac8ccd32c6d4ba59b9d08a0d;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=ef217c559298293d0fdec522e77b9d46c1af535e;hpb=1d849d14e996265619752ead2a5e35356b54cca1;p=biopieces.git diff --git a/bp_bin/patscan_seq b/bp_bin/patscan_seq index ef217c5..28f028d 100755 --- a/bp_bin/patscan_seq +++ b/bp_bin/patscan_seq @@ -36,30 +36,32 @@ require 'maasha/seq' # Class with methods to execute Patscan (a.k.a. scan_for_matches). class Patscan # Method to initialize a Patscan object. - def initialize(pat_file, in_file, out_file, pattern) - @pat_file = pat_file + def initialize(tmp_dir, in_file, patterns) + @tmp_dir = tmp_dir @in_file = in_file - @out_file = out_file - @pattern = pattern + @patterns = patterns - pattern_save + patterns_save end # Method to run Patscan - def run(comp = nil, type = nil, max_mis = nil, max_hit = nil, verbose = nil) - args = [] - args << "scan_for_matches" - args << "-c" if comp - args << "-p" if type == 'protein' - args << "-n #{max_mis}" if max_mis - args << "-m #{max_hit}" if max_hit - args << @pat_file - args << "< #{@in_file}" - args << "> #{@out_file}" - command = args.join(" ") - $stderr.puts command if verbose - system(command) - raise "Command failed: #{command}" unless $?.success? + def run(options) + @patterns.each_with_index do |pattern, i| + args = [] + args << "scan_for_matches" + args << "-c" if options[:comp] + args << "-p" if options[:seq_type] == :protein + args << "-o 1" if options[:overlap] + args << "-n #{options[:max_misses]}" if options[:max_misses] + args << "-m #{options[:max_hits]}" if options[:max_hits] + args << File.join(@tmp_dir, "#{i}.pat") + args << "< #{@in_file}" + args << "> " + File.join(@tmp_dir, "#{i}.out") + command = args.join(" ") + $stderr.puts command if options[:verbose] + system(command) + raise "Command failed: #{command}" unless $?.success? + end end # Method to parse Patscan results and return @@ -68,23 +70,25 @@ class Patscan def results_parse results = Hash.new { |h, k| h[k] = [] } - Fasta.open(@out_file, mode='r') do |ios| - ios.each do |entry| - if entry.seq_name =~ /([^:]+):\[(\d+),(\d+)\]/ - id = $1.to_i - start = $2.to_i - 1 - stop = $3.to_i - 1 - - if stop > start - strand = '+' + @patterns.each_with_index do |pattern, i| + Fasta.open(File.join(@tmp_dir, "#{i}.out"), 'r') do |ios| + ios.each do |entry| + if entry.seq_name =~ /([^:]+):\[(\d+),(\d+)\]/ + id = $1.to_i + start = $2.to_i - 1 + stop = $3.to_i - 1 + + if stop > start + strand = '+' + else + start, stop = stop, start + strand = '-' + end + + results[id.to_i] << Match.new(start, stop, strand, pattern, entry.seq) else - start, stop = stop, start - strand = '-' + raise "Failed to parse seq_name: #{entry.seq_name} in patscan result" end - - results[id.to_i] << Match.new(start, stop, strand, entry.seq) - else - raise "Failed to parse seq_name: #{entry.seq_name} in patscan result" end end end @@ -95,20 +99,23 @@ class Patscan private # Method to save a patscan pattern to a file. - def pattern_save - File.open(@pat_file, "w") do |ios| - ios.puts @pattern + def patterns_save + @patterns.each_with_index do |pattern, i| + File.open(File.join(@tmp_dir, "#{i}.pat"), 'w') do |ios| + ios.puts pattern + end end end # Subclass to define Patscan hits. class Match - attr_reader :start, :stop, :strand, :match + attr_reader :start, :stop, :strand, :pattern, :match - def initialize(start, stop, strand, match) + def initialize(start, stop, strand, pattern, match) @start = start @stop = stop @strand = strand + @pattern = pattern @match = match end @@ -119,59 +126,78 @@ class Patscan end casts = [] -casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'pattern_in', :short=>'P', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'inline', :short=>'i', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'overlap', :short=>'o', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'max_hits', :short=>'h', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} casts << {:long=>'max_misses', :short=>'m', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} options = Biopieces.options_parse(ARGV, casts) +unless options[:pattern] or options[:pattern_in] + raise ArgumentError, "--pattern or --pattern_in must be specified" +end + +patterns = [] + +if options[:pattern_in] + File.open(options[:pattern_in], 'r') do |ios| + ios.each_line { |l| patterns << l.chomp } + end +else + patterns << options[:pattern] +end + +raise ArgumentError, "no patterns found" if patterns.empty? + tmp_dir = Biopieces.mktmpdir tmp_file = File.join(tmp_dir, "tmp.stream") -pat_file = File.join(tmp_dir, "pat.txt") in_file = File.join(tmp_dir, "in.fna") -out_file = File.join(tmp_dir, "out.fna") -out_file = File.join(tmp_dir, "out.fna") seq_name_count = 0 seq_name_hash = {} seq_type = nil Biopieces.open(options[:stream_in], tmp_file) do |input, output| - Fasta.open(in_file, mode="w") do |fasta_io| + Fasta.open(in_file, "w") do |fasta_io| input.each_record do |record| - if record.has_key? :SEQ_NAME + if record[:SEQ_NAME] seq_name_hash[seq_name_count] = record[:SEQ_NAME] record[:SEQ_NAME] = seq_name_count seq_name_count += 1 + seq = Seq.new_bp(record) + if seq_type.nil? - seq = Seq.new("", record[:SEQ]) seq_type = seq.type_guess end + + fasta_io.puts seq.to_fasta end output.puts record - fasta_io.puts record end end end -patscan = Patscan.new(pat_file, in_file, out_file, options[:pattern]) -patscan.run(options[:comp], seq_type, options[:max_hits], options[:max_misses], options[:verbose]) +options[:seq_type] = seq_type + +patscan = Patscan.new(tmp_dir, in_file, patterns) +patscan.run(options) results = patscan.results_parse Biopieces.open(tmp_file, options[:stream_out]) do |input, output| input.each_record do |record| - if record.has_key? :SEQ_NAME + if record[:SEQ_NAME] key = record[:SEQ_NAME].to_i record[:SEQ_NAME] = seq_name_hash[key] if options[:inline] - if results.has_key? key + if results[key] results[key].each do |result| - record[:PATTERN] = options[:pattern] + record[:PATTERN] = result.pattern record[:MATCH] = result.match record[:S_BEG] = result.start record[:S_END] = result.stop @@ -180,6 +206,8 @@ Biopieces.open(tmp_file, options[:stream_out]) do |input, output| output.puts record end + else + output.puts record end else output.puts record @@ -189,7 +217,7 @@ Biopieces.open(tmp_file, options[:stream_out]) do |input, output| results[key].each do |result| new_record[:REC_TYPE] = "PATSCAN" new_record[:S_ID] = record[:SEQ_NAME] - new_record[:Q_ID] = options[:pattern] + new_record[:Q_ID] = result.pattern new_record[:MATCH] = result.match new_record[:S_BEG] = result.start new_record[:S_END] = result.stop