X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fbiopieces.rb;h=1797d9211ed4e16ddcc1101b86d1170437ae314f;hb=c4b49c5ce1ed3b46ae37e6ebd73c21d67d6d4810;hp=8ee242342dcb0f6198667f9005f2ca38b185990c;hpb=494dc53ebd515b1e3e9b91bbebf43059899ca4ce;p=biopieces.git diff --git a/code_ruby/lib/maasha/biopieces.rb b/code_ruby/lib/maasha/biopieces.rb index 8ee2423..1797d92 100644 --- a/code_ruby/lib/maasha/biopieces.rb +++ b/code_ruby/lib/maasha/biopieces.rb @@ -1,6 +1,6 @@ raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9" -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -24,11 +24,34 @@ raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9" # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'fileutils' require 'date' +require 'fileutils' require 'optparse' -require 'open3' require 'pp' +require 'stringio' +require 'zlib' + +TEST = false + +# Monkey patch (evil) changing the to_s method of the Hash class to +# return a Hash in Biopieces format; keys and value pairs on one line +# each seperated with ': ' and terminated by a line of '---'. +class Hash + def to_s + string = "" + + self.each do |key, value| + string << "#{key.to_s}: #{value}\n" + end + + string << "---\n" + + string + end +end + +# Error class for all exceptions to do with the Biopieces class. +class BiopiecesError < StandardError; end # Biopieces are command line scripts and uses OptionParser to parse command line # options according to a list of casts. Each cast prescribes the long and short @@ -42,75 +65,128 @@ require 'pp' class Biopieces include Enumerable - attr_accessor :out # accessor for out stream _ios_ + # Class method to check the integrity of a list of casts, followed by parsing + # options from argv and finally checking the options according to the casts. + def self.options_parse(argv, cast_list=[], script_path=$0) + casts = Casts.new(cast_list) + option_handler = OptionHandler.new(argv, casts, script_path) + options = option_handler.options_parse - # Initialize a Biopiece and write the status to file. - # Options are for testing purposes only. - def initialize(test=nil, input=STDIN, output=STDOUT) - @test = test - @input = input - @output = output + options end - # Check the integrity of a list of casts, followed by parsion options from argv - # and finally checking the options according to the casts. Returns nil if argv - # is empty, otherwise an options hash. - def parse(argv, cast_list=[], script_path=$0) - casts = Casts.new(cast_list) - option_handler = OptionHandler.new(argv, casts, script_path, @test) - @options = option_handler.options_parse + # Class method for opening data streams for reading and writing Biopiece + # records. Records are read from STDIN (default) or file (possibly gzipped) + # and written to STDOUT (default) or file. + def self.open(input = STDIN, output = STDOUT) + io_in = self.open_input(input) + io_out = self.open_output(output) + + if block_given? + begin + yield io_in, io_out + ensure + io_in.close + io_out.close + end + else + return io_in, io_out + end end - # Open Biopiece input stream if not open and iterate over all Biopiece - # records in the stream. + # Class method to create a temporary directory inside the ENV["BP_TMP"] directory. + def self.mktmpdir + time = Time.now.to_i + user = ENV["USER"] + pid = $$ + path = File.join(ENV["BP_TMP"], [user, time + pid, pid, "bp_tmp"].join("_")) + Dir.mkdir(path) + Status.new.set_tmpdir(path) + path + end + + # Initialize a Biopiece object for either reading or writing from _ios_. + def initialize(ios, stdio = nil) + @ios = ios + @stdio = stdio + end + + # Method to write a Biopiece record to _ios_. + def puts(foo) + @ios << foo.to_s + end + + # Method to close _ios_. + def close + @ios.close unless @stdio + end + + # Method to parse and yield a Biopiece record from _ios_. def each_record - @in = Stream::open(@options, mode="r", @input) unless @in.is_a? IO - return if @in.nil? + while record = get_entry + yield record + end + + self # conventionally + end + + alias :each :each_record + def get_entry record = {} - @in.each_line do |line| + @ios.each_line do |line| case line when /^([^:]+): (.*)$/ record[$1.to_sym] = $2 when /^---$/ - yield record unless record.empty? - record = {} + break else - raise "Bad record format: #{line}" + raise BiopiecesError, "Bad record format: #{line}" end end - yield record unless record.empty? - - self # conventionally + return record unless record.empty? end - alias :each :each_record + alias :get_record :each_entry + + private - # Open Biopiece output stream if not open and puts record to the stream. - def puts(record) - @out = Stream::open(@options, mode="w", @output) unless @out.is_a? IO + # Class method for opening data stream for reading Biopiece records. + # Records are read from STDIN (default) or file (possibly gzipped). + def self.open_input(input) + if input.nil? + if STDIN.tty? + input = self.new(StringIO.new) + else + input = self.new(STDIN, true) + end + elsif File.exists? input + ios = File.open(input, 'r') + + begin + ios = Zlib::GzipReader.new(ios) + rescue + ios.rewind + end - record.each do |key,value| - @out.print "#{key.to_s}: #{value}\n" + input = self.new(ios) end - @out.print "---\n" + input end - def to_s - end + # Class method for opening data stream for writing Biopiece records. + # Records are written to STDOUT (default) or file. + def self.open_output(output) + if output.nil? + output = self.new(STDOUT, true) + elsif not output.is_a? IO + output = self.new(File.open(output, 'w')) + end - # Create a temporary directory inside the ENV["BP_TMP"] dir. - def mktmpdir - time = Time.now.to_i - user = ENV["USER"] - pid = $$ - path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_") - Dir.mkdir(path) - Status.new.set_tmpdir(path) - path + output end end @@ -118,7 +194,6 @@ end # Error class for all exceptions to do with option casts. class CastError < StandardError; end - # Class to handle casts of command line options. Each cast prescribes the long and # short name of the option, the type, if it is mandatory, the default value, and # allowed and disallowed values. An optional list of extra casts can be supplied, @@ -141,10 +216,10 @@ class Casts < Array # Add ubiquitous options casts. def ubiquitous - @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - @cast_list << {:long=>'stream_in', :short=>'I', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + @cast_list << {:long=>'stream_in', :short=>'I', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} end # Check integrity of the casts. @@ -158,7 +233,7 @@ class Casts < Array def check_keys @cast_list.each do |cast| MANDATORY.each do |mandatory| - raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym + raise CastError, "Missing symbol in cast: '#{mandatory}'" unless cast.has_key? mandatory.to_sym end end end @@ -197,7 +272,7 @@ class Casts < Array type_hash[type] = true end - unless type_hash.has_key? cast[:type] + unless type_hash[cast[:type]] raise CastError, "Illegal cast of type: '#{cast[:type]}'" end end @@ -237,8 +312,8 @@ class Casts < Array def check_duplicates check_hash = {} @cast_list.each do |cast| - raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long] - raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short] + raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash[cast[:long]] + raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash[cast[:short]] check_hash[cast[:long]] = true check_hash[cast[:short]] = true end @@ -250,7 +325,6 @@ class Casts < Array cast[:long] = cast[:long].to_sym end end - end @@ -263,19 +337,17 @@ class OptionHandler REGEX_INT = /^(int|uint)$/ REGEX_STRING = /^(file|file!|dir|dir!|genome)$/ - def initialize(argv, casts, script_path, test=nil) + def initialize(argv, casts, script_path) @argv = argv @casts = casts @script_path = script_path - @test = test + @options = {} end # Parse options from argv using OptionParser and casts denoting long and # short option names. Usage information is printed and exit called. # A hash with options is returned. def options_parse - @options = {} - option_parser = OptionParser.new do |option| @casts.each do |cast| case cast[:type] @@ -289,7 +361,7 @@ class OptionHandler end when 'string' option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s| - @options[cast[:long]] = s.to_sym # TODO: this to_sym - is that needed? + @options[cast[:long]] = s end when REGEX_LIST option.on( "-#{cast[:short]}", "--#{cast[:long]} A", Array) do |a| @@ -326,7 +398,7 @@ class OptionHandler # Given the script name determine the path of the wiki file with the usage info. def wiki_path - path = ENV["BP_DIR"] + "/bp_usage/" + File.basename(@script_path) + ".wiki" + path = File.join(ENV["BP_DIR"], "bp_usage", File.basename(@script_path)) + ".wiki" raise "No such wiki file: #{path}" unless File.file? path path end @@ -338,7 +410,7 @@ class OptionHandler # Check if short "usage info" should be printed. def print_usage_short? - if not $stdin.tty? + if not STDIN.tty? return false elsif @options[:stream_in] return false @@ -355,7 +427,7 @@ class OptionHandler # using a system() call and exit. An optional 'full' flag # outputs the full usage info. def print_usage_and_exit(full=nil) - if @test + if TEST return else if full @@ -374,7 +446,7 @@ class OptionHandler def options_default @casts.each do |cast| if cast[:default] - unless @options.has_key? cast[:long] + unless @options[cast[:long]] if cast[:type] == 'list' @options[cast[:long]] = cast[:default].split ',' else @@ -390,7 +462,7 @@ class OptionHandler def options_glob @casts.each do |cast| if cast[:type] == 'files' or cast[:type] == 'files!' - if @options.has_key? cast[:long] + if @options[cast[:long]] files = [] @options[cast[:long]].each do |path| @@ -426,13 +498,13 @@ class OptionHandler # Check if a mandatory option is set and raise if it isn't. def options_check_mandatory(cast) if cast[:mandatory] - raise ArgumentError, "Mandatory argument: --#{cast[:long]}" unless @options.has_key? cast[:long] + raise ArgumentError, "Mandatory argument: --#{cast[:long]}" unless @options[cast[:long]] end end # Check int type option and raise if not an integer. def options_check_int(cast) - if cast[:type] == 'int' and @options.has_key? cast[:long] + if cast[:type] == 'int' and @options[cast[:long]] unless @options[cast[:long]].is_a? Integer raise ArgumentError, "Argument to --#{cast[:long]} must be an integer, not '#{@options[cast[:long]]}'" end @@ -441,7 +513,7 @@ class OptionHandler # Check uint type option and raise if not an unsinged integer. def options_check_uint(cast) - if cast[:type] == 'uint' and @options.has_key? cast[:long] + if cast[:type] == 'uint' and @options[cast[:long]] unless @options[cast[:long]].is_a? Integer and @options[cast[:long]] >= 0 raise ArgumentError, "Argument to --#{cast[:long]} must be an unsigned integer, not '#{@options[cast[:long]]}'" end @@ -450,14 +522,14 @@ class OptionHandler # Check file! type argument and raise if file don't exists. def options_check_file(cast) - if cast[:type] == 'file!' and @options.has_key? cast[:long] + if cast[:type] == 'file!' and @options[cast[:long]] raise ArgumentError, "No such file: '#{@options[cast[:long]]}'" unless File.file? @options[cast[:long]] end end # Check files! type argument and raise if files don't exists. def options_check_files(cast) - if cast[:type] == 'files!' and @options.has_key? cast[:long] + if cast[:type] == 'files!' and @options[cast[:long]] @options[cast[:long]].each do |path| next if path == "-" raise ArgumentError, "File not readable: '#{path}'" unless File.readable? path @@ -467,24 +539,24 @@ class OptionHandler # Check dir! type argument and raise if directory don't exist. def options_check_dir(cast) - if cast[:type] == 'dir!' and @options.has_key? cast[:long] + if cast[:type] == 'dir!' and @options[cast[:long]] raise ArgumentError, "No such directory: '#{@options[cast[:long]]}'" unless File.directory? @options[cast[:long]] end end # Check options and raise unless allowed. def options_check_allowed(cast) - if cast[:allowed] and @options.has_key? cast[:long] + if cast[:allowed] and @options[cast[:long]] allowed_hash = {} cast[:allowed].split(',').each { |a| allowed_hash[a.to_s] = 1 } - raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} not allowed" unless allowed_hash.has_key? @options[cast[:long]].to_s + raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} not allowed" unless allowed_hash[@options[cast[:long]].to_s] end end # Check disallowed argument values and raise if disallowed. def options_check_disallowed(cast) - if cast[:disallowed] and @options.has_key? cast[:long] + if cast[:disallowed] and @options[cast[:long]] cast[:disallowed].split(',').each do |val| raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} is disallowed" if val.to_s == @options[cast[:long]].to_s end @@ -501,7 +573,8 @@ class Status def set time0 = Time.new.strftime("%Y-%m-%d %X") - File.open(path, mode="w") do |fh| + File.open(path, "w") do |fh| + fh.flock(File::LOCK_EX) fh.puts [time0, ARGV.join(" ")].join(";") end end @@ -510,13 +583,15 @@ class Status def set_tmpdir(tmpdir_path) status = "" - File.open(path, mode="r") do |fh| + File.open(path, "r") do |fh| + fh.flock(File::LOCK_SH) status = fh.read.chomp end status = "#{status};#{tmpdir_path}\n" - File.open(path, mode="w") do |fh| + File.open(path, "w") do |fh| + fh.flock(File::LOCK_EX) fh << status end end @@ -524,7 +599,8 @@ class Status # Extract the temporary directory path from the status file, # and return this or nil if not found. def get_tmpdir - File.open(path, mode="r") do |fh| + File.open(path, "r") do |fh| + fh.flock(File::LOCK_SH) tmpdir_path = fh.read.chomp.split(";").last return tmpdir_path if File.directory?(tmpdir_path) end @@ -538,15 +614,22 @@ class Status user = ENV["USER"] script = File.basename($0) - stream = File.open(path) - time0, args, tmp_dir = stream.first.split(";") - stream.close + time0 = nil + args = nil + + File.open(path, "r") do |fh| + fh.flock(File::LOCK_SH) + time0, args = fh.first.split(";") + end elap = time_diff(time0, time1) command = [script, args].join(" ") - log_file = ENV["BP_LOG"] + "/biopieces.log" + log_file = File.join(ENV["BP_LOG"], "biopieces.log") - File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") } + File.open(log_file, "a") do |fh| + fh.flock(File::LOCK_EX) + fh.puts [time0, time1, elap, user, exit_status, command].join("\t") + end end # Delete status file. @@ -561,7 +644,9 @@ class Status user = ENV["USER"] script = File.basename($0) pid = $$ - path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") + path = File.join(ENV["BP_TMP"], [user, script, pid, "status"].join(".")) + + path end # Get the elapsed time from the difference between two time stamps. @@ -571,90 +656,10 @@ class Status end -class Stream < IO - # Open Biopieces output data stream for reading from stdin or a file - # specified in options[:stream_in] OR writing to stdout or a file - # specified in options[:stream_out] or options[:data_out]. - def self.open(options, mode, stdio) - if mode == "r" - if options[:data_in] and options[:data_in].first == "-" - self.nread(["-"]) - else - $stdin.tty? ? read(options[:stream_in]) : stdio - end - elsif mode == "w" - options[:stream_out] ? self.write(options[:stream_out], options[:compress]) : stdio - else - raise "Bad mode #{mode}" - end - end - - private - - # Opens a reads stream to a list of files. - def self.read(files) - return if files.nil? #TODO case/when - self.zipped?(files) ? self.zread(files) : self.nread(files) - end - - # Opens a write stream to a file and returns a _io_ object. - def self.write(file, zip=nil) - zip ? self.zwrite(file) : self.nwrite(file) - end - - # Opens a list of gzipped files for reading and return an _io_ object. - def self.zread(files) - stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for gzipped writing and return an _io_ object. - def self.zwrite(file) - stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}") - stderr.close - stdout.close - stdin - end - - # Opens a list of files for reading and return an _io_ object. - def self.nread(files) - stdin, stdout, stderr = Open3.popen3("cat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for writing and return an _io_ object. - def self.nwrite(file) - File.open(file, mode="w") - end - - # Test if a list of files are gzipped or not. - # Raises if files are mixed zipped and unzipped. - def self.zipped?(files) - type_hash = {} - - files.each do |file| - type = `file #{file}` - - if type =~ /gzip compressed/ - type_hash[:gzip] = true - else - type_hash[:ascii] = true - end - end - - raise "Mixture of zipped and unzipped files" if type_hash.size == 2 - - type_hash[:gzip] - end -end - - +# Set status when 'biopieces' is required. Status.new.set +# Clean up when 'biopieces' exists. at_exit do exit_status = $! ? $!.inspect : "OK" @@ -671,10 +676,11 @@ at_exit do status = Status.new tmpdir = status.get_tmpdir - FileUtils.remove_entry_secure(tmpdir) unless tmpdir.nil? + FileUtils.remove_entry_secure(tmpdir, true) unless tmpdir.nil? status.log(exit_status) status.delete end __END__ +