1 raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9"
3 # Copyright (C) 2007-2010 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This software is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 require 'maasha/filesys'
34 # Biopieces are command line scripts and uses OptionParser to parse command line
35 # options according to a list of casts. Each cast prescribes the long and short
36 # name of the option, the type, if it is mandatory, the default value, and allowed
37 # and disallowed values. An optional list of extra casts can be supplied, and the
38 # integrity of the casts are checked. Following the command line parsing, the
39 # options are checked according to the casts. Methods are also included for handling
40 # the parsing and emitting of Biopiece records, which are ASCII text records consisting
41 # of lines with a key/value pair separated by a colon and a white space ': '.
42 # Each record is separated by a line with three dashes '---'.
43 class Biopieces < Filesys
46 attr_accessor :out # accessor for out stream _ios_
48 # Initialize a Biopiece and write the status to file.
49 # Options are for testing purposes only.
50 def initialize(test=nil, input=STDIN, output=STDOUT)
56 # Check the integrity of a list of casts, followed by parsion options from argv
57 # and finally checking the options according to the casts. Returns nil if argv
58 # is empty, otherwise an options hash.
59 def parse(argv, cast_list=[], script_path=$0)
60 casts = Casts.new(cast_list)
61 option_handler = OptionHandler.new(argv, casts, script_path, @test)
62 @options = option_handler.options_parse
64 @in = Stream.open(@options, mode="r", @input)
69 # Open Biopiece input stream if not open and iterate over all Biopiece
70 # records in the stream.
76 @in.each_line do |line|
78 when /^([^:]+): (.*)$/
79 record[$1.to_sym] = $2
81 yield record unless record.empty?
84 raise "Bad record format: #{line}"
88 yield record unless record.empty?
93 alias :each :each_record
95 # Open Biopiece output stream if not open and puts record to the stream.
97 @out = Stream.open(@options, mode="w", @output) unless @out.is_a? IO
99 record.each do |key,value|
100 @out.print "#{key.to_s}: #{value}\n"
109 # Create a temporary directory inside the ENV["BP_TMP"] dir.
114 path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_")
116 Status.new.set_tmpdir(path)
122 # Error class for all exceptions to do with option casts.
123 class CastError < StandardError; end
126 # Class to handle casts of command line options. Each cast prescribes the long and
127 # short name of the option, the type, if it is mandatory, the default value, and
128 # allowed and disallowed values. An optional list of extra casts can be supplied,
129 # and the integrity of the casts are checked.
131 TYPES = %w[flag string list int uint float file file! files files! dir dir! genome]
132 MANDATORY = %w[long short type mandatory default allowed disallowed]
134 # Initialize cast object with an optional options cast list to which
135 # ubiquitous casts are added after which all casts are checked.
136 def initialize(cast_list=[])
137 @cast_list = cast_list
141 self.push(*@cast_list)
146 # Add ubiquitous options casts.
148 @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
149 @cast_list << {:long=>'stream_in', :short=>'I', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
150 @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
151 @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
154 # Check integrity of the casts.
161 # Check if all mandatory keys are present in casts and raise if not.
163 @cast_list.each do |cast|
164 MANDATORY.each do |mandatory|
165 raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym
170 # Check if all values in casts are valid.
172 @cast_list.each do |cast|
174 check_val_short(cast)
176 check_val_mandatory(cast)
177 check_val_default(cast)
178 check_val_allowed(cast)
179 check_val_disallowed(cast)
183 # Check if the values to long are legal and raise if not.
184 def check_val_long(cast)
185 unless cast[:long].is_a? String and cast[:long].length > 1
186 raise CastError, "Illegal cast of long: '#{cast[:long]}'"
190 # Check if the values to short are legal and raise if not.
191 def check_val_short(cast)
192 unless cast[:short].is_a? String and cast[:short].length == 1
193 raise CastError, "Illegal cast of short: '#{cast[:short]}'"
197 # Check if values to type are legal and raise if not.
198 def check_val_type(cast)
201 type_hash[type] = true
204 unless type_hash.has_key? cast[:type]
205 raise CastError, "Illegal cast of type: '#{cast[:type]}'"
209 # Check if values to mandatory are legal and raise if not.
210 def check_val_mandatory(cast)
211 unless cast[:mandatory] == true or cast[:mandatory] == false
212 raise CastError, "Illegal cast of mandatory: '#{cast[:mandatory]}'"
216 # Check if values to default are legal and raise if not.
217 def check_val_default(cast)
218 unless cast[:default].nil? or
219 cast[:default].is_a? String or
220 cast[:default].is_a? Integer or
221 cast[:default].is_a? Float
222 raise CastError, "Illegal cast of default: '#{cast[:default]}'"
226 # Check if values to allowed are legal and raise if not.
227 def check_val_allowed(cast)
228 unless cast[:allowed].is_a? String or cast[:allowed].nil?
229 raise CastError, "Illegal cast of allowed: '#{cast[:allowed]}'"
233 # Check if values to disallowed are legal and raise if not.
234 def check_val_disallowed(cast)
235 unless cast[:disallowed].is_a? String or cast[:disallowed].nil?
236 raise CastError, "Illegal cast of disallowed: '#{cast[:disallowed]}'"
240 # Check cast for duplicate long or short options names.
243 @cast_list.each do |cast|
244 raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long]
245 raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short]
246 check_hash[cast[:long]] = true
247 check_hash[cast[:short]] = true
251 # Convert values to :long keys to symbols for all casts.
253 @cast_list.each do |cast|
254 cast[:long] = cast[:long].to_sym
261 # Class for parsing argv using OptionParser according to given casts.
262 # Default options are set, file glob expressions expanded, and options are
263 # checked according to the casts. Usage information is printed and exit called
266 REGEX_LIST = /^(list|files|files!)$/
267 REGEX_INT = /^(int|uint)$/
268 REGEX_STRING = /^(file|file!|dir|dir!|genome)$/
270 def initialize(argv, casts, script_path, test=nil)
273 @script_path = script_path
277 # Parse options from argv using OptionParser and casts denoting long and
278 # short option names. Usage information is printed and exit called.
279 # A hash with options is returned.
283 option_parser = OptionParser.new do |option|
284 @casts.each do |cast|
287 option.on("-#{cast[:short]}", "--#{cast[:long]}") do |o|
288 @options[cast[:long]] = o
291 option.on("-#{cast[:short]}", "--#{cast[:long]} F", Float) do |f|
292 @options[cast[:long]] = f
295 option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s|
296 @options[cast[:long]] = s.to_sym # TODO: this to_sym - is that needed?
299 option.on( "-#{cast[:short]}", "--#{cast[:long]} A", Array) do |a|
300 @options[cast[:long]] = a
303 option.on("-#{cast[:short]}", "--#{cast[:long]} I", Integer) do |i|
304 @options[cast[:long]] = i
307 option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s|
308 @options[cast[:long]] = s
311 raise ArgumentError, "Unknown option type: '#{cast[:type]}'"
316 option_parser.parse!(@argv)
319 print_usage_and_exit(true)
320 elsif print_usage_short?
331 # Given the script name determine the path of the wiki file with the usage info.
333 path = File.join(ENV["BP_DIR"], "bp_usage", File.basename(@script_path)) + ".wiki"
334 raise "No such wiki file: #{path}" unless File.file? path
338 # Check if full "usage info" should be printed.
339 def print_usage_full?
343 # Check if short "usage info" should be printed.
344 def print_usage_short?
347 elsif @options[:stream_in]
349 elsif @options[:data_in]
351 elsif wiki_path =~ /^(list_biopieces|list_genomes|list_mysql_databases|biostat)$/ # TODO get rid of this!
358 # Print usage info by Calling an external script 'print_wiki'
359 # using a system() call and exit. An optional 'full' flag
360 # outputs the full usage info.
361 def print_usage_and_exit(full=nil)
366 system("print_wiki --data_in #{wiki_path} --help")
368 system("print_wiki --data_in #{wiki_path}")
371 raise "Failed printing wiki: #{wiki_path}" unless $?.success?
377 # Set default options value from cast unless a value is set.
379 @casts.each do |cast|
381 unless @options.has_key? cast[:long]
382 if cast[:type] == 'list'
383 @options[cast[:long]] = cast[:default].split ','
385 @options[cast[:long]] = cast[:default]
392 # Expands glob expressions to a full list of paths.
393 # Examples: "*.fna" or "foo.fna,*.fna" or "foo.fna,/bar/*.fna"
395 @casts.each do |cast|
396 if cast[:type] == 'files' or cast[:type] == 'files!'
397 if @options.has_key? cast[:long]
400 @options[cast[:long]].each do |path|
402 Dir.glob(path).each do |file|
403 files << file if File.file? file
410 @options[cast[:long]] = files
416 # Check all options according to casts.
418 @casts.each do |cast|
419 options_check_mandatory(cast)
420 options_check_int(cast)
421 options_check_uint(cast)
422 options_check_file(cast)
423 options_check_files(cast)
424 options_check_dir(cast)
425 options_check_allowed(cast)
426 options_check_disallowed(cast)
430 # Check if a mandatory option is set and raise if it isn't.
431 def options_check_mandatory(cast)
433 raise ArgumentError, "Mandatory argument: --#{cast[:long]}" unless @options.has_key? cast[:long]
437 # Check int type option and raise if not an integer.
438 def options_check_int(cast)
439 if cast[:type] == 'int' and @options.has_key? cast[:long]
440 unless @options[cast[:long]].is_a? Integer
441 raise ArgumentError, "Argument to --#{cast[:long]} must be an integer, not '#{@options[cast[:long]]}'"
446 # Check uint type option and raise if not an unsinged integer.
447 def options_check_uint(cast)
448 if cast[:type] == 'uint' and @options.has_key? cast[:long]
449 unless @options[cast[:long]].is_a? Integer and @options[cast[:long]] >= 0
450 raise ArgumentError, "Argument to --#{cast[:long]} must be an unsigned integer, not '#{@options[cast[:long]]}'"
455 # Check file! type argument and raise if file don't exists.
456 def options_check_file(cast)
457 if cast[:type] == 'file!' and @options.has_key? cast[:long]
458 raise ArgumentError, "No such file: '#{@options[cast[:long]]}'" unless File.file? @options[cast[:long]]
462 # Check files! type argument and raise if files don't exists.
463 def options_check_files(cast)
464 if cast[:type] == 'files!' and @options.has_key? cast[:long]
465 @options[cast[:long]].each do |path|
467 raise ArgumentError, "File not readable: '#{path}'" unless File.readable? path
472 # Check dir! type argument and raise if directory don't exist.
473 def options_check_dir(cast)
474 if cast[:type] == 'dir!' and @options.has_key? cast[:long]
475 raise ArgumentError, "No such directory: '#{@options[cast[:long]]}'" unless File.directory? @options[cast[:long]]
479 # Check options and raise unless allowed.
480 def options_check_allowed(cast)
481 if cast[:allowed] and @options.has_key? cast[:long]
483 cast[:allowed].split(',').each { |a| allowed_hash[a.to_s] = 1 }
485 raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} not allowed" unless allowed_hash.has_key? @options[cast[:long]].to_s
489 # Check disallowed argument values and raise if disallowed.
490 def options_check_disallowed(cast)
491 if cast[:disallowed] and @options.has_key? cast[:long]
492 cast[:disallowed].split(',').each do |val|
493 raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} is disallowed" if val.to_s == @options[cast[:long]].to_s
499 # Class for manipulating the execution status of Biopieces by setting a
500 # status file with a time stamp, process id, and command arguments. The
501 # status file is used for creating log entries and for displaying the
502 # runtime status of Biopieces.
504 # Write the status to a status file.
506 time0 = Time.new.strftime("%Y-%m-%d %X")
508 File.open(path, mode="w") do |fh|
509 fh.puts [time0, ARGV.join(" ")].join(";")
513 # Append the a temporary directory path to the status file.
514 def set_tmpdir(tmpdir_path)
517 File.open(path, mode="r") do |fh|
518 status = fh.read.chomp
521 status = "#{status};#{tmpdir_path}\n"
523 File.open(path, mode="w") do |fh|
528 # Extract the temporary directory path from the status file,
529 # and return this or nil if not found.
531 File.open(path, mode="r") do |fh|
532 tmpdir_path = fh.read.chomp.split(";").last
533 return tmpdir_path if File.directory?(tmpdir_path)
539 # Write the Biopiece status to the log file.
541 time1 = Time.new.strftime("%Y-%m-%d %X")
543 script = File.basename($0)
545 stream = File.open(path)
546 time0, args, tmp_dir = stream.first.split(";")
549 elap = time_diff(time0, time1)
550 command = [script, args].join(" ")
551 log_file = ENV["BP_LOG"] + "/biopieces.log"
553 File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") }
556 # Delete status file.
563 # Path to status file
566 script = File.basename($0)
568 path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
571 # Get the elapsed time from the difference between two time stamps.
572 def time_diff(t0, t1)
573 Time.at((DateTime.parse(t1).to_time - DateTime.parse(t0).to_time).to_i).gmtime.strftime('%X')
579 # Open Biopieces output data stream for reading from stdin or a file
580 # specified in options[:stream_in] OR writing to stdout or a file
581 # specified in options[:stream_out] or options[:data_out].
582 def self.open(options, mode, stdio)
584 if options[:data_in] and options[:data_in].first == "-"
587 $stdin.tty? ? read(options[:stream_in]) : stdio
590 options[:stream_out] ? self.write(options[:stream_out], options[:compress]) : stdio
592 raise "Bad mode #{mode}"
598 # Opens a reads stream to a list of files.
600 return if files.nil? #TODO case/when
601 self.zipped?(files) ? self.zread(files) : self.nread(files)
604 # Opens a write stream to a file and returns a _io_ object.
605 def self.write(file, zip=nil)
606 zip ? self.zwrite(file) : self.nwrite(file)
609 # Opens a list of gzipped files for reading and return an _io_ object.
610 def self.zread(files)
611 stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' '));
617 # Opens a file for gzipped writing and return an _io_ object.
618 def self.zwrite(file)
619 stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}")
625 # Opens a list of files for reading and return an _io_ object.
626 def self.nread(files)
627 stdin, stdout, stderr = Open3.popen3("cat " + files.join(' '));
633 # Opens a file for writing and return an _io_ object.
634 def self.nwrite(file)
635 File.open(file, mode="w")
638 # Test if a list of files are gzipped or not.
639 # Raises if files are mixed zipped and unzipped.
640 def self.zipped?(files)
644 type = `file #{file}`
646 if type =~ /gzip compressed/
647 type_hash[:gzip] = true
649 type_hash[:ascii] = true
653 raise "Mixture of zipped and unzipped files" if type_hash.size == 2
659 # Set status when 'biopieces' is required.
662 # Clean up when 'biopieces' exists.
664 exit_status = $! ? $!.inspect : "OK"
668 exit_status = "ERROR"
670 exit_status = "INTERRUPTED"
672 exit_status = "TERMINATED"
678 tmpdir = status.get_tmpdir
679 FileUtils.remove_entry_secure(tmpdir) unless tmpdir.nil?
680 status.log(exit_status)