5 # Biopieces are command line scripts and uses OptionParser to parse command line
6 # options according to a list of casts. Each cast prescribes the long and short
7 # name of the option, the type, if it is mandatory, the default value, and allowed
8 # and disallowed values. An optional list of extra casts can be supplied, and the
9 # integrity of the casts are checked. Following the command line parsing, the
10 # options are checked according to the casts. Methods are also included for handling
11 # the parsing and emitting of Biopiece records, which are ASCII text records consisting
12 # of lines with a key/value pair seperated by a colon and a white space ': '.
13 # Each record is separated by a line with three dashes '---'.
15 REGEX_LIST = /^(list|files|files!)$/
16 REGEX_INT = /^(int|uint)$/
17 REGEX_STRING = /^(string|file|file!|dir|dir!|genome)$/
19 # Initialize a Biopiece and write the status to file.
20 # Options are for testing purposes only.
21 def initialize(no_status=nil,input=STDIN,output=STDOUT)
22 status_set unless no_status
27 # Check the integrity of a list of casts, followed by parsion options from argv
28 # and finally checking the options according to the casts. Returns nil if
29 # argv is empty, otherwise an options hash.
30 def parse(argv,cast_list=[],script_path=$0)
31 casts = Casts.new(cast_list)
35 @script_path = script_path
39 options_template = OptionParser.new do |option|
41 if cast[:type] == 'flag'
42 option.on("-#{cast[:short]}", "--#{cast[:long]}") do |o|
43 @options[cast[:long]] = o
45 elsif cast[:type] =~ REGEX_LIST
46 option.on( "-#{cast[:short]}", "--#{cast[:long]} A", Array) do |a|
47 @options[cast[:long]] = a
49 elsif cast[:type] =~ REGEX_INT
50 option.on("-#{cast[:short]}", "--#{cast[:long]} I", Integer) do |i|
51 @options[cast[:long]] = i
53 elsif cast[:type] =~ REGEX_STRING
54 option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s|
55 @options[cast[:long]] = s
57 elsif cast[:type] == 'float'
58 option.on("-#{cast[:short]}", "--#{cast[:long]} F", Float) do |f|
59 @options[cast[:long]] = f
62 raise ArgumentError, "Unknown option type: '#{cast[:type]}'"
67 options_template.parse!(argv)
70 print_usage_and_exit(true)
71 return # break for unit testing.
72 elsif print_usage_short?
74 return # break for unit testing.
84 # Open Biopiece input stream if not open and iterate over all Biopiece
85 # records in the stream.
87 @in = stream_in_open unless @in.is_a? IO
91 @in.each_line do |line|
93 when /^([^:]+): (.*)$/
96 yield record unless record.empty?
99 raise "Bad record format: #{line}"
103 yield record unless record.empty?
105 self # conventionally
108 alias :each :each_record
110 # Open Biopiece output stream if not open and puts record to the stream.
112 @out = stream_out_open unless @out.is_a? IO
114 record.each do |key,value|
115 @out.print "#{key}: #{value}\n"
121 # Close Biopiece streams, remove tmp_dir, end log status.
123 @in.close if @in.respond_to? :close
124 @out.close if @out.respond_to? :close
125 # remove tmpdir if found
132 # Given the script name determine the path of the wiki file with the usage info.
134 path = ENV["BP_DIR"] + "/bp_usage/" + File.basename(@script_path, ".rb") + ".wiki"
135 raise "No such wiki file: #{path}" unless File.file? path
140 # Check if full "usage info" should be printed.
141 def print_usage_full?
145 # Check if short "usage info" should be printed.
146 def print_usage_short?
149 elsif @options["stream_in"]
151 elsif @options["data_in"]
153 elsif wiki_path =~ /^(list_biopieces|list_genomes|list_mysql_databases|biostat)$/ # TODO get rid of this!
160 # Print usage info by Calling an external script 'print_wiki'
161 # using a system() call and exit. An optional 'full' flag
162 # outputs the full usage info.
163 def print_usage_and_exit(full=nil)
165 system("print_wiki --data_in #{wiki_path} --help")
167 system("print_wiki --data_in #{wiki_path}")
170 raise "Failed printing wiki: #{wiki_path}" unless $?.success?
175 # Set default options value from cast unless a value is set.
179 @options[cast[:long]] = cast[:default] unless @options.has_key? cast[:long]
184 # Expands glob expressions to a full list of paths.
185 # Examples: "*.fna" or "foo.fna,*.fna" or "foo.fna,/bar/*.fna"
188 if cast[:type] == 'files' or cast[:type] == 'files!'
189 if @options.has_key? cast[:long]
192 @options[cast[:long]].each do |path|
194 Dir.glob(path).each do |file|
195 files << file if File.file? file
202 @options[cast[:long]] = files
208 # Check all options according to casts.
211 options_check_mandatory(cast)
212 options_check_int(cast)
213 options_check_uint(cast)
214 options_check_file(cast)
215 options_check_files(cast)
216 options_check_dir(cast)
217 options_check_allowed(cast)
218 options_check_disallowed(cast)
222 # Check if a mandatory option is set and raise if it isn't.
223 def options_check_mandatory(cast)
225 raise ArgumentError, "Mandatory argument: --#{cast[:long]}" unless @options.has_key? cast[:long]
229 # Check int type option and raise if not an integer.
230 def options_check_int(cast)
231 if cast[:type] == 'int' and @options.has_key? cast[:long]
232 unless @options[cast[:long]].is_a? Integer
233 raise ArgumentError, "Argument to --#{cast[:long]} must be an integer, not '#{@options[cast[:long]]}'"
238 # Check uint type option and raise if not an unsinged integer.
239 def options_check_uint(cast)
240 if cast[:type] == 'uint' and @options.has_key? cast[:long]
241 unless @options[cast[:long]].is_a? Integer and @options[cast[:long]] >= 0
242 raise ArgumentError, "Argument to --#{cast[:long]} must be an unsigned integer, not '#{@options[cast[:long]]}'"
247 # Check file! type argument and raise if file don't exists.
248 def options_check_file(cast)
249 if cast[:type] == 'file!' and @options.has_key? cast[:long]
250 raise ArgumentError, "No such file: '#{@options[cast[:long]]}'" unless File.file? @options[cast[:long]]
254 # Check files! type argument and raise if files don't exists.
255 def options_check_files(cast)
256 if cast[:type] == 'files!' and @options.has_key? cast[:long]
257 @options[cast[:long]].each do |path|
258 raise ArgumentError, "No such file: '#{path}'" unless File.file? path
263 # Check dir! type argument and raise if directory don't exist.
264 def options_check_dir(cast)
265 if cast[:type] == 'dir!' and @options.has_key? cast[:long]
266 raise ArgumentError, "No such directory: '#{@options[cast[:long]]}'" unless File.directory? @options[cast[:long]]
270 # Check options and raise unless allowed.
271 def options_check_allowed(cast)
272 if cast[:allowed] and @options.has_key? cast[:long]
274 cast[:allowed].split(',').each { |a| allowed_hash[a] = 1 }
276 raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} not allowed" unless allowed_hash.has_key? @options[cast[:long]]
280 # Check disallowed argument values and raise if disallowed.
281 def options_check_disallowed(cast)
282 if cast[:disallowed] and @options.has_key? cast[:long]
283 cast[:disallowed].split(',').each do |val|
284 raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} is disallowed" if val == @options[cast[:long]]
289 # Open Biopieces input data stream for reading from either
290 # stdin or from a list of files specified in options["stream_in"].
296 stream = read(@options["stream_in"])
302 # Open Biopieces output data stream for writing to stdout
303 # or a file specified in options["stream_out"].
305 if @options["stream_out"]
306 stream = write(@options["stream_out"], @options["compress"])
315 # Opens a reads stream to a list of files.
318 stream = zread(files)
320 stream = nread(files)
326 # Opens a write stream to a file and returns a _io_ object.
327 def write(file, zip=nil)
328 zip ? zwrite(file) : nwrite(file)
331 # Test if a list of files are gzipped or not.
332 # Raises if files are mixed zipped and unzipped.
337 type = `file #{file}`
339 if type =~ /gzip compressed/
340 type_hash[:gzip] = true
342 type_hash[:ascii] = true
346 raise "Mixture of zipped and unzipped files" if type_hash.size == 2
351 # Opens a list of gzipped files for reading and return an _io_ object.
353 stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' '));
359 # Opens a file for gzipped writing and return an _io_ object.
361 stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}")
367 # Opens a list of files for reading and return an _io_ object.
369 stdin, stdout, stderr = Open3.popen3("cat " + files.join(' '));
375 # Opens a file for writing and return an _io_ object.
377 File.open(file, mode="w")
380 # Write the status to a status file.
383 time = now.strftime("%Y-%m-%d %X")
385 script = File.basename($0, ".rb")
387 path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
389 File.open(path, mode="w") { |file| file.puts [time, ARGV.join(" ")].join(";") }
392 # Write the Biopiece status to the log file.
393 def status_log(status="OK")
395 time1 = now.strftime("%Y-%m-%d %X")
397 script = File.basename($0, ".rb")
399 path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
401 stream = File.open(path)
402 time0, args, tmp_dir = stream.first.split(";")
404 # Dir.rmdir(tmp_dir) unless tmp_dir.nil? and File.directory? tmp_dir #TODO fix this!
406 elap = time_diff(time0, time1)
407 command = [script, args].join(" ")
408 log_file = ENV["BP_LOG"] + "/biopieces.log"
410 File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, status, command].join("\t") }
413 # Get the elapsed time from the difference between two time stamps.
414 def time_diff(t0, t1)
415 t0 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/
423 sec0 += day0 * 24 * 60 * 60
424 sec0 += hour0 * 60 * 60
427 t1 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/
435 sec1 += day1 * 24 * 60 * 60
436 sec1 += hour1 * 60 * 60
445 hour = ( sec / ( 60 * 60 ) ).to_i
446 sec -= hour * 60 * 60
448 min = ( sec / 60 ).to_i
451 [sprintf("%02d", hour), sprintf("%02d", min), sprintf("%02d", sec)].join(":")
455 # Error class for all exceptions to do with option casts.
456 class CastError < StandardError
459 # Class to handle casts of command line options. Each cast prescribes the long and
460 # short name of the option, the type, if it is mandatory, the default value, and
461 # allowed and disallowed values. An optional list of extra casts can be supplied,
462 # and the integrity of the casts are checked.
464 TYPES = %w[flag string list int uint float file file! files files! dir dir! genome]
465 MANDATORY = %w[long short type mandatory default allowed disallowed]
467 def initialize(cast_list=[])
468 @cast_list = cast_list
474 # Add ubiquitous options casts.
476 @cast_list << {:long => 'help', :short => '?', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
477 @cast_list << {:long => 'stream_in', :short => 'I', :type => 'files!', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
478 @cast_list << {:long => 'stream_out', :short => 'O', :type => 'file', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
479 @cast_list << {:long => 'verbose', :short => 'v', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
482 # Check integrity of the casts.
489 # Check if all mandatory keys are present in casts and raise if not.
491 @cast_list.each do |cast|
492 MANDATORY.each do |mandatory|
493 raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym
498 # Check if all values in casts are valid.
500 @cast_list.each do |cast|
502 check_val_short(cast)
504 check_val_mandatory(cast)
505 check_val_default(cast)
506 check_val_allowed(cast)
507 check_val_disallowed(cast)
511 # Check if the values to long are legal and raise if not.
512 def check_val_long(cast)
513 unless cast[:long].is_a? String and cast[:long].length > 1
514 raise CastError, "Illegal cast of long: '#{cast[:long]}'"
518 # Check if the values to short are legal and raise if not.
519 def check_val_short(cast)
520 unless cast[:short].is_a? String and cast[:short].length == 1
521 raise CastError, "Illegal cast of short: '#{cast[:short]}'"
525 # Check if values to type are legal and raise if not.
526 def check_val_type(cast)
529 type_hash[type] = true
532 unless type_hash.has_key? cast[:type]
533 raise CastError, "Illegal cast of type: '#{cast[:type]}'"
537 # Check if values to mandatory are legal and raise if not.
538 def check_val_mandatory(cast)
539 unless cast[:mandatory] == true or cast[:mandatory] == false
540 raise CastError, "Illegal cast of mandatory: '#{cast[:mandatory]}'"
544 # Check if values to default are legal and raise if not.
545 def check_val_default(cast)
546 unless cast[:default].nil? or
547 cast[:default].is_a? String or
548 cast[:default].is_a? Integer or
549 cast[:default].is_a? Float
550 raise CastError, "Illegal cast of default: '#{cast[:default]}'"
554 # Check if values to allowed are legal and raise if not.
555 def check_val_allowed(cast)
556 unless cast[:allowed].is_a? String or cast[:allowed].nil?
557 raise CastError, "Illegal cast of allowed: '#{cast[:allowed]}'"
561 # Check if values to disallowed are legal and raise if not.
562 def check_val_disallowed(cast)
563 unless cast[:disallowed].is_a? String or cast[:disallowed].nil?
564 raise CastError, "Illegal cast of disallowed: '#{cast[:disallowed]}'"
568 # Check cast for duplicate long or short options names.
571 @cast_list.each do |cast|
572 raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long]
573 raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short]
574 check_hash[cast[:long]] = true
575 check_hash[cast[:short]] = true