raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9"
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2011 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-require 'fileutils'
require 'date'
+require 'fileutils'
require 'optparse'
-require 'open3'
require 'pp'
-require 'maasha/filesys'
+require 'stringio'
+require 'zlib'
+
+TEST = false
+
+# Monkey patch (evil) changing the to_s method of the Hash class to
+# return a Hash in Biopieces format; keys and value pairs on one line
+# each seperated with ': ' and terminated by a line of '---'.
+class Hash
+ def to_s
+ string = ""
+
+ self.each do |key, value|
+ string << "#{key.to_s}: #{value}\n"
+ end
+
+ string << "---\n"
+
+ string
+ end
+end
+
+# Error class for all exceptions to do with the Biopieces class.
+class BiopiecesError < StandardError; end
# Biopieces are command line scripts and uses OptionParser to parse command line
# options according to a list of casts. Each cast prescribes the long and short
# the parsing and emitting of Biopiece records, which are ASCII text records consisting
# of lines with a key/value pair separated by a colon and a white space ': '.
# Each record is separated by a line with three dashes '---'.
-class Biopieces < Filesys
+class Biopieces
include Enumerable
- attr_accessor :out # accessor for out stream _ios_
+ # Class method to check the integrity of a list of casts, followed by parsing
+ # options from argv and finally checking the options according to the casts.
+ def self.options_parse(argv, cast_list=[], script_path=$0)
+ casts = Casts.new(cast_list)
+ option_handler = OptionHandler.new(argv, casts, script_path)
+ options = option_handler.options_parse
- # Initialize a Biopiece and write the status to file.
- # Options are for testing purposes only.
- def initialize(test=nil, input=STDIN, output=STDOUT)
- @test = test
- @input = input
- @output = output
+ options
end
- # Check the integrity of a list of casts, followed by parsion options from argv
- # and finally checking the options according to the casts. Returns nil if argv
- # is empty, otherwise an options hash.
- def parse(argv, cast_list=[], script_path=$0)
- casts = Casts.new(cast_list)
- option_handler = OptionHandler.new(argv, casts, script_path, @test)
- @options = option_handler.options_parse
+ # Class method for opening data streams for reading and writing Biopiece
+ # records. Records are read from STDIN (default) or file (possibly gzipped)
+ # and written to STDOUT (default) or file.
+ def self.open(input = STDIN, output = STDOUT)
+ io_in = self.open_input(input)
+ io_out = self.open_output(output)
- @in = Stream.open(@options, mode="r", @input)
+ if block_given?
+ begin
+ yield io_in, io_out
+ ensure
+ io_in.close
+ io_out.close
+ end
+ else
+ return io_in, io_out
+ end
+ end
- @options
+ # Class method to create a temporary directory inside the ENV["BP_TMP"] directory.
+ def self.mktmpdir
+ time = Time.now.to_i
+ user = ENV["USER"]
+ pid = $$
+ path = File.join(ENV["BP_TMP"], [user, time + pid, pid, "bp_tmp"].join("_"))
+ Dir.mkdir(path)
+ Status.new.set_tmpdir(path)
+ path
end
- # Open Biopiece input stream if not open and iterate over all Biopiece
- # records in the stream.
+ # Initialize a Biopiece object for either reading or writing from _ios_.
+ def initialize(ios, stdio = nil)
+ @ios = ios
+ @stdio = stdio
+ end
+
+ # Method to write a Biopiece record to _ios_.
+ def puts(foo)
+ @ios << foo.to_s
+ end
+
+ # Method to close _ios_.
+ def close
+ @ios.close unless @stdio
+ end
+
+ # Method to parse and yield a Biopiece record from _ios_.
def each_record
- return if @in.nil?
-
+ while record = get_entry
+ yield record
+ end
+
+ self # conventionally
+ end
+
+ alias :each :each_record
+
+ def get_entry
record = {}
- @in.each_line do |line|
+ @ios.each_line do |line|
case line
when /^([^:]+): (.*)$/
record[$1.to_sym] = $2
when /^---$/
- yield record unless record.empty?
- record = {}
+ break
else
- raise "Bad record format: #{line}"
+ raise BiopiecesError, "Bad record format: #{line}"
end
end
- yield record unless record.empty?
-
- self # conventionally
+ return record unless record.empty?
end
- alias :each :each_record
+ alias :get_record :each_entry
+
+ private
- # Open Biopiece output stream if not open and puts record to the stream.
- def puts(record)
- @out = Stream.open(@options, mode="w", @output) unless @out.is_a? IO
+ # Class method for opening data stream for reading Biopiece records.
+ # Records are read from STDIN (default) or file (possibly gzipped).
+ def self.open_input(input)
+ if input.nil?
+ if STDIN.tty?
+ input = self.new(StringIO.new)
+ else
+ input = self.new(STDIN, true)
+ end
+ elsif File.exists? input
+ ios = File.open(input, 'r')
+
+ begin
+ ios = Zlib::GzipReader.new(ios)
+ rescue
+ ios.rewind
+ end
- record.each do |key,value|
- @out.print "#{key.to_s}: #{value}\n"
+ input = self.new(ios)
end
- @out.print "---\n"
+ input
end
- def to_s
- end
+ # Class method for opening data stream for writing Biopiece records.
+ # Records are written to STDOUT (default) or file.
+ def self.open_output(output)
+ if output.nil?
+ output = self.new(STDOUT, true)
+ elsif not output.is_a? IO
+ output = self.new(File.open(output, 'w'))
+ end
- # Create a temporary directory inside the ENV["BP_TMP"] dir.
- def mktmpdir
- time = Time.now.to_i
- user = ENV["USER"]
- pid = $$
- path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_")
- Dir.mkdir(path)
- Status.new.set_tmpdir(path)
- path
+ output
end
end
# Error class for all exceptions to do with option casts.
class CastError < StandardError; end
-
# Class to handle casts of command line options. Each cast prescribes the long and
# short name of the option, the type, if it is mandatory, the default value, and
# allowed and disallowed values. An optional list of extra casts can be supplied,
# Add ubiquitous options casts.
def ubiquitous
- @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
- @cast_list << {:long=>'stream_in', :short=>'I', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
- @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
- @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ @cast_list << {:long=>'stream_in', :short=>'I', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
end
# Check integrity of the casts.
cast[:long] = cast[:long].to_sym
end
end
-
end
REGEX_INT = /^(int|uint)$/
REGEX_STRING = /^(file|file!|dir|dir!|genome)$/
- def initialize(argv, casts, script_path, test=nil)
+ def initialize(argv, casts, script_path)
@argv = argv
@casts = casts
@script_path = script_path
- @test = test
+ @options = {}
end
# Parse options from argv using OptionParser and casts denoting long and
# short option names. Usage information is printed and exit called.
# A hash with options is returned.
def options_parse
- @options = {}
-
option_parser = OptionParser.new do |option|
@casts.each do |cast|
case cast[:type]
end
when 'string'
option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s|
- @options[cast[:long]] = s.to_sym # TODO: this to_sym - is that needed?
+ @options[cast[:long]] = s
end
when REGEX_LIST
option.on( "-#{cast[:short]}", "--#{cast[:long]} A", Array) do |a|
# Check if short "usage info" should be printed.
def print_usage_short?
- if not $stdin.tty?
+ if not STDIN.tty?
return false
elsif @options[:stream_in]
return false
# using a system() call and exit. An optional 'full' flag
# outputs the full usage info.
def print_usage_and_exit(full=nil)
- if @test
+ if TEST
return
else
if full
def set
time0 = Time.new.strftime("%Y-%m-%d %X")
- File.open(path, mode="w") do |fh|
+ File.open(path, "w") do |fh|
+ fh.flock(File::LOCK_EX)
fh.puts [time0, ARGV.join(" ")].join(";")
end
end
def set_tmpdir(tmpdir_path)
status = ""
- File.open(path, mode="r") do |fh|
+ File.open(path, "r") do |fh|
+ fh.flock(File::LOCK_SH)
status = fh.read.chomp
end
status = "#{status};#{tmpdir_path}\n"
- File.open(path, mode="w") do |fh|
+ File.open(path, "w") do |fh|
+ fh.flock(File::LOCK_EX)
fh << status
end
end
# Extract the temporary directory path from the status file,
# and return this or nil if not found.
def get_tmpdir
- File.open(path, mode="r") do |fh|
+ File.open(path, "r") do |fh|
+ fh.flock(File::LOCK_SH)
tmpdir_path = fh.read.chomp.split(";").last
return tmpdir_path if File.directory?(tmpdir_path)
end
user = ENV["USER"]
script = File.basename($0)
- stream = File.open(path)
- time0, args, tmp_dir = stream.first.split(";")
- stream.close
+ time0 = nil
+ args = nil
+
+ File.open(path, "r") do |fh|
+ fh.flock(File::LOCK_SH)
+ time0, args = fh.first.split(";")
+ end
elap = time_diff(time0, time1)
command = [script, args].join(" ")
- log_file = ENV["BP_LOG"] + "/biopieces.log"
+ log_file = File.join(ENV["BP_LOG"], "biopieces.log")
- File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") }
+ File.open(log_file, "a") do |fh|
+ fh.flock(File::LOCK_EX)
+ fh.puts [time0, time1, elap, user, exit_status, command].join("\t")
+ end
end
# Delete status file.
user = ENV["USER"]
script = File.basename($0)
pid = $$
- path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
+ path = File.join(ENV["BP_TMP"], [user, script, pid, "status"].join("."))
+
+ path
end
# Get the elapsed time from the difference between two time stamps.
end
-class Stream < IO
- # Open Biopieces output data stream for reading from stdin or a file
- # specified in options[:stream_in] OR writing to stdout or a file
- # specified in options[:stream_out] or options[:data_out].
- def self.open(options, mode, stdio)
- if mode == "r"
- if options[:data_in] and options[:data_in].first == "-"
- self.nread(["-"])
- else
- $stdin.tty? ? read(options[:stream_in]) : stdio
- end
- elsif mode == "w"
- options[:stream_out] ? self.write(options[:stream_out], options[:compress]) : stdio
- else
- raise "Bad mode #{mode}"
- end
- end
-
- private
-
- # Opens a reads stream to a list of files.
- def self.read(files)
- return if files.nil? #TODO case/when
- self.zipped?(files) ? self.zread(files) : self.nread(files)
- end
-
- # Opens a write stream to a file and returns a _io_ object.
- def self.write(file, zip=nil)
- zip ? self.zwrite(file) : self.nwrite(file)
- end
-
- # Opens a list of gzipped files for reading and return an _io_ object.
- def self.zread(files)
- stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' '));
- stdin.close
- stderr.close
- stdout
- end
-
- # Opens a file for gzipped writing and return an _io_ object.
- def self.zwrite(file)
- stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}")
- stderr.close
- stdout.close
- stdin
- end
-
- # Opens a list of files for reading and return an _io_ object.
- def self.nread(files)
- stdin, stdout, stderr = Open3.popen3("cat " + files.join(' '));
- stdin.close
- stderr.close
- stdout
- end
-
- # Opens a file for writing and return an _io_ object.
- def self.nwrite(file)
- File.open(file, mode="w")
- end
-
- # Test if a list of files are gzipped or not.
- # Raises if files are mixed zipped and unzipped.
- def self.zipped?(files)
- type_hash = {}
-
- files.each do |file|
- type = `file #{file}`
-
- if type =~ /gzip compressed/
- type_hash[:gzip] = true
- else
- type_hash[:ascii] = true
- end
- end
-
- raise "Mixture of zipped and unzipped files" if type_hash.size == 2
-
- type_hash[:gzip]
- end
-end
-
# Set status when 'biopieces' is required.
Status.new.set
status = Status.new
tmpdir = status.get_tmpdir
- FileUtils.remove_entry_secure(tmpdir) unless tmpdir.nil?
+ FileUtils.remove_entry_secure(tmpdir, true) unless tmpdir.nil?
status.log(exit_status)
status.delete
end
__END__
+