-#!/usr/bin/env perl
+#!/usr/bin/env ruby
-use warnings;
-use strict;
+# Copyright (C) 2007-2011 Martin A. Hansen.
-use Maasha::BioRun;
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Run Patscan on sequences in the stream.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'maasha/biopieces'
+require 'maasha/fasta'
+require 'maasha/seq'
+
+# Class with methods to execute Patscan (a.k.a. scan_for_matches).
+class Patscan
+ # Method to initialize a Patscan object.
+ def initialize(pat_file, in_file, out_file, pattern)
+ @pat_file = pat_file
+ @in_file = in_file
+ @out_file = out_file
+ @pattern = pattern
+
+ pattern_save
+ end
+
+ # Method to run Patscan
+ def run(comp = nil, type = nil, max_hit = nil, max_mis = nil, verbose = nil)
+ args = []
+ args << "scan_for_matches"
+ args << "-c" if comp
+ args << "-p" if type == :protein
+ args << "-n #{max_mis}" if max_mis
+ args << "-m #{max_hit}" if max_hit
+ args << @pat_file
+ args << "< #{@in_file}"
+ args << "> #{@out_file}"
+ command = args.join(" ")
+ $stderr.puts command if verbose
+ system(command)
+ raise "Command failed: #{command}" unless $?.success?
+ end
+
+ # Method to parse Patscan results and return
+ # these in a hash with ID as key and a list
+ # of hits as value.
+ def results_parse
+ results = Hash.new { |h, k| h[k] = [] }
+
+ Fasta.open(@out_file, 'r') do |ios|
+ ios.each do |entry|
+ if entry.seq_name =~ /([^:]+):\[(\d+),(\d+)\]/
+ id = $1.to_i
+ start = $2.to_i - 1
+ stop = $3.to_i - 1
+
+ if stop > start
+ strand = '+'
+ else
+ start, stop = stop, start
+ strand = '-'
+ end
+
+ results[id.to_i] << Match.new(start, stop, strand, entry.seq)
+ else
+ raise "Failed to parse seq_name: #{entry.seq_name} in patscan result"
+ end
+ end
+ end
+
+ results
+ end
+
+ private
+
+ # Method to save a patscan pattern to a file.
+ def pattern_save
+ File.open(@pat_file, "w") do |ios|
+ ios.puts @pattern
+ end
+ end
+
+ # Subclass to define Patscan hits.
+ class Match
+ attr_reader :start, :stop, :strand, :match
+
+ def initialize(start, stop, strand, match)
+ @start = start
+ @stop = stop
+ @strand = strand
+ @match = match
+ end
+
+ def length
+ @stop - @start + 1
+ end
+ end
+end
+
+casts = []
+casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'inline', :short=>'i', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'max_hits', :short=>'h', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'max_misses', :short=>'m', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+tmp_dir = Biopieces.mktmpdir
+tmp_file = File.join(tmp_dir, "tmp.stream")
+pat_file = File.join(tmp_dir, "pat.txt")
+in_file = File.join(tmp_dir, "in.fna")
+out_file = File.join(tmp_dir, "out.fna")
+out_file = File.join(tmp_dir, "out.fna")
+
+seq_name_count = 0
+seq_name_hash = {}
+seq_type = nil
+
+Biopieces.open(options[:stream_in], tmp_file) do |input, output|
+ Fasta.open(in_file, mode="w") do |fasta_io|
+ input.each_record do |record|
+ if record[:SEQ_NAME]
+ seq_name_hash[seq_name_count] = record[:SEQ_NAME]
+ record[:SEQ_NAME] = seq_name_count
+ seq_name_count += 1
+
+ seq = Seq.new_bp(record)
+
+ if seq_type.nil?
+ seq_type = seq.type_guess
+ end
+
+ fasta_io.puts seq.to_fasta
+ end
+
+ output.puts record
+ end
+ end
+end
+
+patscan = Patscan.new(pat_file, in_file, out_file, options[:pattern])
+patscan.run(options[:comp], seq_type, options[:max_hits], options[:max_misses], options[:verbose])
+results = patscan.results_parse
+
+Biopieces.open(tmp_file, options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record[:SEQ_NAME]
+ key = record[:SEQ_NAME].to_i
+ record[:SEQ_NAME] = seq_name_hash[key]
+
+ if options[:inline]
+ if results[key]
+ results[key].each do |result|
+ record[:PATTERN] = options[:pattern]
+ record[:MATCH] = result.match
+ record[:S_BEG] = result.start
+ record[:S_END] = result.stop
+ record[:STRAND] = result.strand
+ record[:MATCH_LEN] = result.length
+
+ output.puts record
+ end
+ else
+ output.puts record
+ end
+ else
+ output.puts record
+
+ new_record = {}
+
+ results[key].each do |result|
+ new_record[:REC_TYPE] = "PATSCAN"
+ new_record[:S_ID] = record[:SEQ_NAME]
+ new_record[:Q_ID] = options[:pattern]
+ new_record[:MATCH] = result.match
+ new_record[:S_BEG] = result.start
+ new_record[:S_END] = result.stop
+ new_record[:STRAND] = result.strand
+ new_record[:SCORE] = 100
+ new_record[:MATCH_LEN] = result.length
+
+ output.puts new_record
+ end
+ end
+ else
+ output.puts record
+ end
+ end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__