X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fusearch_seq;h=946659f0c165652d32f2c39259ef8074e9484be8;hb=bdcd4e373c9a3361781e5f171d2d60166c46dc3d;hp=9bd3d46469b5fb9aacd910356a4e6335c0d83464;hpb=8c572d5e8bf807c65a7b08383dab3b644aa14842;p=biopieces.git diff --git a/bp_bin/usearch_seq b/bp_bin/usearch_seq index 9bd3d46..946659f 100755 --- a/bp_bin/usearch_seq +++ b/bp_bin/usearch_seq @@ -30,76 +30,24 @@ require 'maasha/biopieces' require 'maasha/fasta' - -class Usearch - include Enumerable - - def initialize(infile, outfile, options) - @infile = infile - @outfile = outfile - @options = options - @command = [] - end - - # Method to execute database search. - def usearch - @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}" - @command << "--userfields target+tloz+thiz+query+bits+strand" - @command << "--id #{@options[:identity]}" if @options.has_key? :identity - @command << "--evalue #{@options[:e_val]}" if @options.has_key? :e_val - - execute - end - - # Method to parse a Useach .uc file and for each line of data - # yield a Biopiece record. - def each - record = {} - - File.open(@outfile, mode="r") do |ios| - ios.gets # skip comment line - ios.each_line do |line| - fields = line.chomp.split("\t") - - record[:REC_TYPE] = "USEARCH" - record[:S_ID] = fields[0] - record[:S_BEG] = fields[1].to_i - record[:S_END] = fields[2].to_i - record[:Q_ID] = fields[3] - record[:SCORE] = fields[4].to_f - record[:STRAND] = fields[5] - - yield record - end - end - - self # conventionally - end - - private - - # Method to execute a command using a system() call. - # The command is composed of bits from the @command variable. - def execute - @command.unshift "nice -n 19" - @command << "--rev" - @command << "> /dev/null 2>&1" unless @options[:verbose] - command = @command.join(" ") - system(command) - raise "Command failed: #{command}" unless $?.success? - - @command = [] - end -end +require 'maasha/usearch' casts = [] -casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {long: 'program', short: 'p', type: 'string', mandatory: false, default: 'global', allowed: "local,global", disallowed: nil} +casts << {long: 'database', short: 'd', type: 'file!', mandatory: true, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'identity', short: 'i', type: 'float', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'e_val', short: 'e', type: 'float', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'cpus', short: 'c', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"} +casts << {long: 'maxaccepts', short: 'm', type: 'uint', mandatory: false, default: 0, allowed: nil, disallowed: nil} options = Biopieces.options_parse(ARGV, casts) -raise ArgumentError, "--identity or --e_val must be specified" unless options[:identity] or options[:e_val] +if options[:program] == 'global' + raise ArgumentError, "--identity be specified for global search" unless options[:identity] + raise ArgumentError, "--e_val is invalid for global search" if options[:e_val] +else + raise ArgumentError, "--identity or --e_val must be specified" unless options[:identity] or options[:e_val] +end tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") @@ -110,17 +58,21 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| output.puts record - if record.has_key? :SEQ_NAME and record.has_key? :SEQ + if record[:SEQ_NAME] and record[:SEQ] fasta_io.puts Seq.new_bp(record).to_fasta end end end - uc = Usearch.new(infile, outfile, options) + us = Usearch.new(infile, outfile, options) - uc.usearch + case options[:program] + when "local" then us.usearch_local + when "global" then us.usearch_global + else raise "no such program #{options[:program]}" + end - uc.each do |record| + us.each_hit do |record| output.puts record end end