+require 'fileutils'
+require 'date'
require 'optparse'
require 'open3'
require 'pp'
# of lines with a key/value pair seperated by a colon and a white space ': '.
# Each record is separated by a line with three dashes '---'.
class Biopieces
- REGEX_LIST = /^(list|files|files!)$/
- REGEX_INT = /^(int|uint)$/
- REGEX_STRING = /^(string|file|file!|dir|dir!|genome)$/
-
# Initialize a Biopiece and write the status to file.
# Options are for testing purposes only.
- def initialize(no_status=nil,input=STDIN,output=STDOUT)
- status_set unless no_status
+ def initialize(test=nil, input=STDIN, output=STDOUT)
+ @test = test
@input = input
@output = output
+ @status = Status.new
+ @status.set unless @test
end
# Check the integrity of a list of casts, followed by parsion options from argv
- # and finally checking the options according to the casts. Returns nil if
- # argv is empty, otherwise an options hash.
- def parse(argv,cast_list=[],script_path=$0)
- casts = Casts.new(cast_list)
-
- pp casts
+ # and finally checking the options according to the casts. Returns nil if argv
+ # is empty, otherwise an options hash.
+ def parse(argv, cast_list=[], script_path=$0)
+ casts = Casts.new(cast_list)
+ option_handler = OptionHandler.new(argv, casts, script_path, @test)
+ @options = option_handler.options_parse
+ end
+
+ # Open Biopiece input stream if not open and iterate over all Biopiece
+ # records in the stream.
+ def each_record
+ @in = Stream::open(@options, mode="r", @input) unless @in.is_a? IO
+
+ record = {}
+
+ @in.each_line do |line|
+ case line
+ when /^([^:]+): (.*)$/
+ record[$1] = $2
+ when /^---$/
+ yield record unless record.empty?
+ record = {}
+ else
+ raise "Bad record format: #{line}"
+ end
+ end
+
+ yield record unless record.empty?
+
+ self # conventionally
+ end
+
+ alias :each :each_record
+
+ # Open Biopiece output stream if not open and puts record to the stream.
+ def puts(record)
+ @out = Stream::open(@options, mode="w", @output) unless @out.is_a? IO
+
+ record.each do |key,value|
+ @out.print "#{key}: #{value}\n"
+ end
+
+ @out.print "---\n"
+ end
+
+ # Create a temporary directory inside the ENV["BP_TMP"] dir.
+ def mktmpdir
+ time = Time.now.to_i
+ user = ENV["USER"]
+ pid = $$
+ path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_")
+ Dir.mkdir(path)
+ @status.set_tmpdir(path)
+ path
+ end
+end
+
+
+# Error class for all exceptions to do with option casts.
+class CastError < StandardError; end
+
+
+# Class to handle casts of command line options. Each cast prescribes the long and
+# short name of the option, the type, if it is mandatory, the default value, and
+# allowed and disallowed values. An optional list of extra casts can be supplied,
+# and the integrity of the casts are checked.
+class Casts < Array
+ TYPES = %w[flag string list int uint float file file! files files! dir dir! genome]
+ MANDATORY = %w[long short type mandatory default allowed disallowed]
+
+ # Initialize cast object with an optional options cast list to which
+ # ubiquitous casts are added after which all casts are checked.
+ def initialize(cast_list=[])
+ @cast_list = cast_list
+ ubiquitous
+ check
+ self.push *@cast_list
+ end
+
+ private
+
+ # Add ubiquitous options casts.
+ def ubiquitous
+ @cast_list << {:long => 'help', :short => '?', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ @cast_list << {:long => 'stream_in', :short => 'I', :type => 'files!', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ @cast_list << {:long => 'stream_out', :short => 'O', :type => 'file', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ @cast_list << {:long => 'verbose', :short => 'v', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ end
+
+ # Check integrity of the casts.
+ def check
+ check_keys
+ check_values
+ check_duplicates
+ end
+
+ # Check if all mandatory keys are present in casts and raise if not.
+ def check_keys
+ @cast_list.each do |cast|
+ MANDATORY.each do |mandatory|
+ raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym
+ end
+ end
+ end
+
+ # Check if all values in casts are valid.
+ def check_values
+ @cast_list.each do |cast|
+ check_val_long(cast)
+ check_val_short(cast)
+ check_val_type(cast)
+ check_val_mandatory(cast)
+ check_val_default(cast)
+ check_val_allowed(cast)
+ check_val_disallowed(cast)
+ end
+ end
+
+ # Check if the values to long are legal and raise if not.
+ def check_val_long(cast)
+ unless cast[:long].is_a? String and cast[:long].length > 1
+ raise CastError, "Illegal cast of long: '#{cast[:long]}'"
+ end
+ end
+
+ # Check if the values to short are legal and raise if not.
+ def check_val_short(cast)
+ unless cast[:short].is_a? String and cast[:short].length == 1
+ raise CastError, "Illegal cast of short: '#{cast[:short]}'"
+ end
+ end
+
+ # Check if values to type are legal and raise if not.
+ def check_val_type(cast)
+ type_hash = {}
+ TYPES.each do |type|
+ type_hash[type] = true
+ end
+
+ unless type_hash.has_key? cast[:type]
+ raise CastError, "Illegal cast of type: '#{cast[:type]}'"
+ end
+ end
+
+ # Check if values to mandatory are legal and raise if not.
+ def check_val_mandatory(cast)
+ unless cast[:mandatory] == true or cast[:mandatory] == false
+ raise CastError, "Illegal cast of mandatory: '#{cast[:mandatory]}'"
+ end
+ end
+
+ # Check if values to default are legal and raise if not.
+ def check_val_default(cast)
+ unless cast[:default].nil? or
+ cast[:default].is_a? String or
+ cast[:default].is_a? Integer or
+ cast[:default].is_a? Float
+ raise CastError, "Illegal cast of default: '#{cast[:default]}'"
+ end
+ end
+
+ # Check if values to allowed are legal and raise if not.
+ def check_val_allowed(cast)
+ unless cast[:allowed].is_a? String or cast[:allowed].nil?
+ raise CastError, "Illegal cast of allowed: '#{cast[:allowed]}'"
+ end
+ end
+
+ # Check if values to disallowed are legal and raise if not.
+ def check_val_disallowed(cast)
+ unless cast[:disallowed].is_a? String or cast[:disallowed].nil?
+ raise CastError, "Illegal cast of disallowed: '#{cast[:disallowed]}'"
+ end
+ end
+
+ # Check cast for duplicate long or short options names.
+ def check_duplicates
+ check_hash = {}
+ @cast_list.each do |cast|
+ raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long]
+ raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short]
+ check_hash[cast[:long]] = true
+ check_hash[cast[:short]] = true
+ end
+ end
+end
+
+# Class for parsing argv using OptionParser according to given casts.
+# Default options are set, file glob expressions expanded, and options are
+# checked according to the casts. Usage information is printed and exit called
+# if required.
+class OptionHandler
+ REGEX_LIST = /^(list|files|files!)$/
+ REGEX_INT = /^(int|uint)$/
+ REGEX_STRING = /^(string|file|file!|dir|dir!|genome)$/
+
+ def initialize(argv, casts, script_path, test=nil)
+ @argv = argv
+ @casts = casts
@script_path = script_path
+ @test = test
+ end
+ # Parse options from argv using OptionParser and casts denoting long and
+ # short option names. Usage information is printed and exit called.
+ # A hash with options is returned.
+ def options_parse
@options = {}
- options_template = OptionParser.new do |option|
- casts.each do |cast|
+ option_parser = OptionParser.new do |option|
+ @casts.each do |cast|
if cast[:type] == 'flag'
option.on("-#{cast[:short]}", "--#{cast[:long]}") do |o|
@options[cast[:long]] = o
end
end
- options_template.parse!(argv)
+ option_parser.parse!(@argv)
if print_usage_full?
print_usage_and_exit(true)
- return # break for unit testing.
elsif print_usage_short?
print_usage_and_exit
- return # break for unit testing.
end
options_default
@options
end
- # Open Biopiece input stream if not open and iterate over all Biopiece
- # records in the stream.
- def each_record
- @in = stream_in_open unless @in.is_a? IO
-
- record = {}
-
- @in.each_line do |line|
- case line
- when /^([^:]+): (.*)$/
- record[$1] = $2
- when /^---$/
- yield record unless record.empty?
- record = {}
- else
- raise "Bad record format: #{line}"
- end
- end
-
- yield record unless record.empty?
-
- self # conventionally
- end
-
- alias :each :each_record
-
- # Open Biopiece output stream if not open and puts record to the stream.
- def puts(record)
- @out = stream_out_open unless @out.is_a? IO
-
- record.each do |key,value|
- @out.print "#{key}: #{value}\n"
- end
-
- @out.print "---\n"
- end
-
- # Close Biopiece streams, remove tmp_dir, end log status.
- def clean
- @in.close if @in.respond_to? :close
- @out.close if @out.respond_to? :close
- # remove tmpdir if found
- status_log
- # remove status file
- end
-
- private
-
# Given the script name determine the path of the wiki file with the usage info.
def wiki_path
- path = ENV["BP_DIR"] + "/bp_usage/" + File.basename(@script_path, ".rb") + ".wiki"
+ path = ENV["BP_DIR"] + "/bp_usage/" + File.basename(@script_path) + ".wiki"
raise "No such wiki file: #{path}" unless File.file? path
-
path
end
# using a system() call and exit. An optional 'full' flag
# outputs the full usage info.
def print_usage_and_exit(full=nil)
- if full
- system("print_wiki --data_in #{wiki_path} --help")
+ if @test
+ return
else
- system("print_wiki --data_in #{wiki_path}")
- end
+ if full
+ system("print_wiki --data_in #{wiki_path} --help")
+ else
+ system("print_wiki --data_in #{wiki_path}")
+ end
- raise "Failed printing wiki: #{wiki_path}" unless $?.success?
+ raise "Failed printing wiki: #{wiki_path}" unless $?.success?
- exit
+ exit
+ end
end
# Set default options value from cast unless a value is set.
def options_default
- casts.each do |cast|
+ @casts.each do |cast|
if cast[:default]
@options[cast[:long]] = cast[:default] unless @options.has_key? cast[:long]
end
# Expands glob expressions to a full list of paths.
# Examples: "*.fna" or "foo.fna,*.fna" or "foo.fna,/bar/*.fna"
def options_glob
- casts.each do |cast|
+ @casts.each do |cast|
if cast[:type] == 'files' or cast[:type] == 'files!'
if @options.has_key? cast[:long]
files = []
# Check all options according to casts.
def options_check
- casts.each do |cast|
+ @casts.each do |cast|
options_check_mandatory(cast)
options_check_int(cast)
options_check_uint(cast)
end
end
end
+end
- # Open Biopieces input data stream for reading from either
- # stdin or from a list of files specified in options["stream_in"].
- def stream_in_open
- if not $stdin.tty?
- p "IN"
- stream = @input
- else
- stream = read(@options["stream_in"])
- end
+# Class for manipulating the execution status of Biopieces by setting a
+# status file with a time stamp, process id, and command arguments. The
+# status file is used for creating log entries and for displaying the
+# runtime status of Biopieces.
+class Status
+ # Write the status to a status file.
+ def set
+ time0 = Time.new.strftime("%Y-%m-%d %X")
- stream
+ File.open(path, mode="w") { |file| file.puts [time0, ARGV.join(" ")].join(";") }
end
- # Open Biopieces output data stream for writing to stdout
- # or a file specified in options["stream_out"].
- def stream_out_open
- if @options["stream_out"]
- stream = write(@options["stream_out"], @options["compress"])
- else
- p "OUT"
- stream = @output
+ # Append the a temporary directory path to the status file.
+ def set_tmpdir(tmp_path)
+ status = File.open(path, mode="r").read.chomp
+
+ File.open(path, mode="w") do |fh|
+ fh << [status, tmp_path].join(";") + "\n"
end
+ end
- stream
+ # Extract the temporary directory path from the status file,
+ # and return this or nil if not found.
+ def get_tmpdir
+ tmp_path = File.open(path, mode="r").read.chomp.split(";").last
+
+ File.directory?(tmp_path) ? tmp_path : nil
end
- # Opens a reads stream to a list of files.
- def read(files)
- if zipped?(files)
- stream = zread(files)
- else
- stream = nread(files)
- end
+ # Write the Biopiece status to the log file.
+ def log(exit_status)
+ time1 = Time.new.strftime("%Y-%m-%d %X")
+ user = ENV["USER"]
+ script = File.basename($0)
+
+ stream = File.open(path)
+ time0, args, tmp_dir = stream.first.split(";")
+ stream.close
- stream
+ elap = time_diff(time0, time1)
+ command = [script, args].join(" ")
+ log_file = ENV["BP_LOG"] + "/biopieces.log"
+
+ File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") }
end
- # Opens a write stream to a file and returns a _io_ object.
- def write(file, zip=nil)
- zip ? zwrite(file) : nwrite(file)
+ # Delete status file.
+ def delete
+ File.delete(path)
end
+
+ private
- # Test if a list of files are gzipped or not.
- # Raises if files are mixed zipped and unzipped.
- def zipped?(files)
- type_hash = {}
+ # Path to status file
+ def path
+ user = ENV["USER"]
+ script = File.basename($0)
+ pid = $$
+ path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
+ end
- files.each do |file|
- type = `file #{file}`
+ # Get the elapsed time from the difference between two time stamps.
+ def time_diff(t0, t1)
+ Time.at((DateTime.parse(t1).to_time - DateTime.parse(t0).to_time).to_i).gmtime.strftime('%X')
+ end
+end
- if type =~ /gzip compressed/
- type_hash[:gzip] = true
- else
- type_hash[:ascii] = true
- end
+
+class Stream < IO
+ # Open Biopieces output data stream for reading from stdin or a file
+ # specified in options["stream_in"] OR writing to stdout or a file
+ # specified in options["stream_out"].
+ def self.open(options, mode, stdio)
+ if mode == "r"
+ $stdin.tty? ? read(options["stream_in"]) : stdio
+ elsif mode == "w"
+ options["stream_out"] ? self.write(options["stream_out"], options["compress"]) : stdio
+ else
+ raise "Bad mode #{mode}"
end
+ end
- raise "Mixture of zipped and unzipped files" if type_hash.size == 2
+ private
- type_hash[:gzip]
+ # Opens a reads stream to a list of files.
+ def self.read(files)
+ self.zipped?(files) ? self.zread(files) : self.nread(files)
+ end
+
+ # Opens a write stream to a file and returns a _io_ object.
+ def self.write(file, zip=nil)
+ zip ? self.zwrite(file) : self.nwrite(file)
end
# Opens a list of gzipped files for reading and return an _io_ object.
- def zread(files)
+ def self.zread(files)
stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' '));
stdin.close
stderr.close
end
# Opens a file for gzipped writing and return an _io_ object.
- def zwrite(file)
+ def self.zwrite(file)
stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}")
stderr.close
stdout.close
end
# Opens a list of files for reading and return an _io_ object.
- def nread(files)
+ def self.nread(files)
stdin, stdout, stderr = Open3.popen3("cat " + files.join(' '));
stdin.close
stderr.close
end
# Opens a file for writing and return an _io_ object.
- def nwrite(file)
+ def self.nwrite(file)
File.open(file, mode="w")
end
- # Write the status to a status file.
- def status_set
- now = Time.new
- time = now.strftime("%Y-%m-%d %X")
- user = ENV["USER"]
- script = File.basename($0, ".rb")
- pid = $$
- path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
-
- File.open(path, mode="w") { |file| file.puts [time, ARGV.join(" ")].join(";") }
- end
-
- # Write the Biopiece status to the log file.
- def status_log(status="OK")
- now = Time.new
- time1 = now.strftime("%Y-%m-%d %X")
- user = ENV["USER"]
- script = File.basename($0, ".rb")
- pid = $$
- path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
-
- stream = File.open(path)
- time0, args, tmp_dir = stream.first.split(";")
-
- # Dir.rmdir(tmp_dir) unless tmp_dir.nil? and File.directory? tmp_dir #TODO fix this!
-
- elap = time_diff(time0, time1)
- command = [script, args].join(" ")
- log_file = ENV["BP_LOG"] + "/biopieces.log"
-
- File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, status, command].join("\t") }
- end
-
- # Get the elapsed time from the difference between two time stamps.
- def time_diff(t0, t1)
- t0 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/
- year0 = $1.to_i
- mon0 = $2.to_i
- day0 = $3.to_i
- hour0 = $4.to_i
- min0 = $5.to_i
- sec0 = $6.to_i
-
- sec0 += day0 * 24 * 60 * 60
- sec0 += hour0 * 60 * 60
- sec0 += min0 * 60
-
- t1 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/
- year1 = $1.to_i
- mon1 = $2.to_i
- day1 = $3.to_i
- hour1 = $4.to_i
- min1 = $5.to_i
- sec1 = $6.to_i
-
- sec1 += day1 * 24 * 60 * 60
- sec1 += hour1 * 60 * 60
- sec1 += min1 * 60
-
- year = year1 - year0
- mon = mon1 - mon0
- day = day1 - day0
-
- sec = sec1 - sec0
-
- hour = ( sec / ( 60 * 60 ) ).to_i
- sec -= hour * 60 * 60
-
- min = ( sec / 60 ).to_i
- sec -= min * 60
-
- [sprintf("%02d", hour), sprintf("%02d", min), sprintf("%02d", sec)].join(":")
- end
-end
-
-# Error class for all exceptions to do with option casts.
-class CastError < StandardError
-end
-
-# Class to handle casts of command line options. Each cast prescribes the long and
-# short name of the option, the type, if it is mandatory, the default value, and
-# allowed and disallowed values. An optional list of extra casts can be supplied,
-# and the integrity of the casts are checked.
-class Casts < Array
- TYPES = %w[flag string list int uint float file file! files files! dir dir! genome]
- MANDATORY = %w[long short type mandatory default allowed disallowed]
-
- def initialize(cast_list=[])
- @cast_list = cast_list
- ubiquitous
- check
- self << @cast_list
- end
+ # Test if a list of files are gzipped or not.
+ # Raises if files are mixed zipped and unzipped.
+ def self.zipped?(files)
+ type_hash = {}
- # Add ubiquitous options casts.
- def ubiquitous
- @cast_list << {:long => 'help', :short => '?', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
- @cast_list << {:long => 'stream_in', :short => 'I', :type => 'files!', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
- @cast_list << {:long => 'stream_out', :short => 'O', :type => 'file', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
- @cast_list << {:long => 'verbose', :short => 'v', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
- end
+ files.each do |file|
+ type = `file #{file}`
- # Check integrity of the casts.
- def check
- check_keys
- check_values
- check_duplicates
- end
-
- # Check if all mandatory keys are present in casts and raise if not.
- def check_keys
- @cast_list.each do |cast|
- MANDATORY.each do |mandatory|
- raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym
+ if type =~ /gzip compressed/
+ type_hash[:gzip] = true
+ else
+ type_hash[:ascii] = true
end
end
- end
- # Check if all values in casts are valid.
- def check_values
- @cast_list.each do |cast|
- check_val_long(cast)
- check_val_short(cast)
- check_val_type(cast)
- check_val_mandatory(cast)
- check_val_default(cast)
- check_val_allowed(cast)
- check_val_disallowed(cast)
- end
- end
+ raise "Mixture of zipped and unzipped files" if type_hash.size == 2
- # Check if the values to long are legal and raise if not.
- def check_val_long(cast)
- unless cast[:long].is_a? String and cast[:long].length > 1
- raise CastError, "Illegal cast of long: '#{cast[:long]}'"
- end
- end
-
- # Check if the values to short are legal and raise if not.
- def check_val_short(cast)
- unless cast[:short].is_a? String and cast[:short].length == 1
- raise CastError, "Illegal cast of short: '#{cast[:short]}'"
- end
+ type_hash[:gzip]
end
+end
- # Check if values to type are legal and raise if not.
- def check_val_type(cast)
- type_hash = {}
- TYPES.each do |type|
- type_hash[type] = true
- end
- unless type_hash.has_key? cast[:type]
- raise CastError, "Illegal cast of type: '#{cast[:type]}'"
- end
- end
+# Stuff to be done at begin such as setting the exit status
+# and signal handlers
+def do_at_begin
+ status = Status.new
+ status.set
+ exit_status = "OK"
- # Check if values to mandatory are legal and raise if not.
- def check_val_mandatory(cast)
- unless cast[:mandatory] == true or cast[:mandatory] == false
- raise CastError, "Illegal cast of mandatory: '#{cast[:mandatory]}'"
+ # Install signal handlers
+ %w( INT TERM QUIT ).each do |signal|
+ Signal.trap(signal) do
+ exit_status = signal
+ exit
end
end
- # Check if values to default are legal and raise if not.
- def check_val_default(cast)
- unless cast[:default].nil? or
- cast[:default].is_a? String or
- cast[:default].is_a? Integer or
- cast[:default].is_a? Float
- raise CastError, "Illegal cast of default: '#{cast[:default]}'"
- end
- end
+ exit_status
+end
- # Check if values to allowed are legal and raise if not.
- def check_val_allowed(cast)
- unless cast[:allowed].is_a? String or cast[:allowed].nil?
- raise CastError, "Illegal cast of allowed: '#{cast[:allowed]}'"
+# Make sure that file streams are closed, tmpdir removed, and
+# exit status is written to log file.
+def do_at_exit(exit_status="OK-martin")
+ at_exit do
+ begin
+ @in.close if @in.respond_to? :close # FIXME: This can never work
+ @out.close if @out.respond_to? :close # FIXME: Same
+ rescue Exception => exception
+ $stderr.puts "Exception caught!" # DEBUG
+ case exit_status
+ when 'INT' then exit_status = 'INTERRUPTED'
+ when 'TERM' then exit_status = 'TERMINATED'
+ end
+ exit_status = "DIED" unless exit_status =~ /INT|TERM|QUIT/
+ $stderr.puts exit_status
+ $stderr.puts exception.backtrace
+ ensure
+ status = Status.new
+ FileUtils.remove_entry_secure(status.get_tmpdir)
+ status.log(exit_status)
+ status.delete
+ puts "DEBUG EXIT STATUS: #{exit_status}" # DEBUG
end
end
+end
- # Check if values to disallowed are legal and raise if not.
- def check_val_disallowed(cast)
- unless cast[:disallowed].is_a? String or cast[:disallowed].nil?
- raise CastError, "Illegal cast of disallowed: '#{cast[:disallowed]}'"
- end
- end
+exit_status = do_at_begin
- # Check cast for duplicate long or short options names.
- def check_duplicates
- check_hash = {}
- @cast_list.each do |cast|
- raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long]
- raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short]
- check_hash[cast[:long]] = true
- check_hash[cast[:short]] = true
- end
- end
-end
+END {
+ do_at_exit(exit_status)
+}
__END__