X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fbiopieces.rb;h=5fd6edf4a505154bb929976a86c58246ac5d3f85;hb=0153863ce54bf865f397a1c4b6a45b0180d71e31;hp=ca235318c815ea3021c65c74c1ad8ffd58155e55;hpb=2944deebd4724b87f68388f4178ce127aec7ce56;p=biopieces.git diff --git a/code_ruby/lib/maasha/biopieces.rb b/code_ruby/lib/maasha/biopieces.rb index ca23531..5fd6edf 100644 --- a/code_ruby/lib/maasha/biopieces.rb +++ b/code_ruby/lib/maasha/biopieces.rb @@ -1,6 +1,6 @@ raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9" -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -24,11 +24,34 @@ raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9" # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'fileutils' require 'date' +require 'fileutils' require 'optparse' -require 'open3' require 'pp' +require 'stringio' +require 'zlib' + +TEST = false + +# Monkey patch (evil) changing the to_s method of the Hash class to +# return a Hash in Biopieces format; keys and value pairs on one line +# each seperated with ': ' and terminated by a line of '---'. +class Hash + def to_s + string = "" + + self.each do |key, value| + string << "#{key.to_s}: #{value}\n" + end + + string << "---\n" + + string + end +end + +# Error class for all exceptions to do with the Biopieces class. +class BiopiecesError < StandardError; end # Biopieces are command line scripts and uses OptionParser to parse command line # options according to a list of casts. Each cast prescribes the long and short @@ -42,37 +65,67 @@ require 'pp' class Biopieces include Enumerable - attr_accessor :out # accessor for out stream _ios_ + # Class method to check the integrity of a list of casts, followed by parsing + # options from argv and finally checking the options according to the casts. + def self.options_parse(argv, cast_list=[], script_path=$0) + casts = Casts.new(cast_list) + option_handler = OptionHandler.new(argv, casts, script_path) + options = option_handler.options_parse - # Initialize a Biopiece and write the status to file. - # Options are for testing purposes only. - def initialize(test=nil, input=STDIN, output=STDOUT) - @test = test - @input = input - @output = output + options end - # Check the integrity of a list of casts, followed by parsion options from argv - # and finally checking the options according to the casts. Returns nil if argv - # is empty, otherwise an options hash. - def parse(argv, cast_list=[], script_path=$0) - casts = Casts.new(cast_list) - option_handler = OptionHandler.new(argv, casts, script_path, @test) - @options = option_handler.options_parse + # Class method for opening data streams for reading and writing Biopiece + # records. Records are read from STDIN (default) or file (possibly gzipped) + # and written to STDOUT (default) or file. + def self.open(input = STDIN, output = STDOUT) + input = self.open_input(input) + output = self.open_output(output) - @in = Stream.open(@options, mode="r", @input) + if block_given? + begin + yield input, output + ensure + input.close + output.close + end + else + return input, output + end + end - @options + # Class method to create a temporary directory inside the ENV["BP_TMP"] directory. + def self.mktmpdir + time = Time.now.to_i + user = ENV["USER"] + pid = $$ + path = File.join(ENV["BP_TMP"], [user, time + pid, pid, "bp_tmp"].join("_")) + Dir.mkdir(path) + Status.new.set_tmpdir(path) + path + end + + # Initialize a Biopiece object for either reading or writing from _ios_. + def initialize(ios, stdio = nil) + @ios = ios + @stdio = stdio + end + + # Method to write a Biopiece record to _ios_. + def puts(foo) + @ios << foo.to_s + end + + # Method to close _ios_. + def close + @ios.close unless @stdio end - # Open Biopiece input stream if not open and iterate over all Biopiece - # records in the stream. + # Method to parse and yield a Biopiece record from _ios_. def each_record - return if @in.nil? - record = {} - @in.each_line do |line| + @ios.each_line do |line| case line when /^([^:]+): (.*)$/ record[$1.to_sym] = $2 @@ -80,7 +133,7 @@ class Biopieces yield record unless record.empty? record = {} else - raise "Bad record format: #{line}" + raise BiopiecesError, "Bad record format: #{line}" end end @@ -91,29 +144,42 @@ class Biopieces alias :each :each_record - # Open Biopiece output stream if not open and puts record to the stream. - def puts(record) - @out = Stream.open(@options, mode="w", @output) unless @out.is_a? IO + private + + # Class method for opening data stream for reading Biopiece records. + # Records are read from STDIN (default) or file (possibly gzipped). + def self.open_input(input) + if input.nil? + if STDIN.tty? + input = self.new(StringIO.new) + else + input = self.new(STDIN, stdio = true) + end + elsif File.exists? input + ios = File.open(input, mode='r') + + begin + ios = Zlib::GzipReader.new(ios) + rescue + ios.rewind + end - record.each do |key,value| - @out.print "#{key.to_s}: #{value}\n" + input = self.new(ios) end - @out.print "---\n" + input end - def to_s - end + # Class method for opening data stream for writing Biopiece records. + # Records are written to STDOU (default) or file. + def self.open_output(output) + if output.nil? + output = self.new(STDOUT, stdio = true) + elsif not output.is_a? IO + output = self.new(File.open(output, mode='w')) + end - # Create a temporary directory inside the ENV["BP_TMP"] dir. - def mktmpdir - time = Time.now.to_i - user = ENV["USER"] - pid = $$ - path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_") - Dir.mkdir(path) - Status.new.set_tmpdir(path) - path + output end end @@ -121,7 +187,6 @@ end # Error class for all exceptions to do with option casts. class CastError < StandardError; end - # Class to handle casts of command line options. Each cast prescribes the long and # short name of the option, the type, if it is mandatory, the default value, and # allowed and disallowed values. An optional list of extra casts can be supplied, @@ -145,7 +210,7 @@ class Casts < Array # Add ubiquitous options casts. def ubiquitous @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - @cast_list << {:long=>'stream_in', :short=>'I', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + @cast_list << {:long=>'stream_in', :short=>'I', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} end @@ -253,7 +318,6 @@ class Casts < Array cast[:long] = cast[:long].to_sym end end - end @@ -266,11 +330,10 @@ class OptionHandler REGEX_INT = /^(int|uint)$/ REGEX_STRING = /^(file|file!|dir|dir!|genome)$/ - def initialize(argv, casts, script_path, test=nil) + def initialize(argv, casts, script_path) @argv = argv @casts = casts @script_path = script_path - @test = test end # Parse options from argv using OptionParser and casts denoting long and @@ -315,7 +378,7 @@ class OptionHandler option_parser.parse!(@argv) if print_usage_full? - print_usage_and_exit(true) + print_usage_and_exit(full=true) elsif print_usage_short? print_usage_and_exit end @@ -341,7 +404,7 @@ class OptionHandler # Check if short "usage info" should be printed. def print_usage_short? - if not $stdin.tty? + if not STDIN.tty? return false elsif @options[:stream_in] return false @@ -358,7 +421,7 @@ class OptionHandler # using a system() call and exit. An optional 'full' flag # outputs the full usage info. def print_usage_and_exit(full=nil) - if @test + if TEST return else if full @@ -547,7 +610,7 @@ class Status elap = time_diff(time0, time1) command = [script, args].join(" ") - log_file = ENV["BP_LOG"] + "/biopieces.log" + log_file = File.join(ENV["BP_LOG"], "biopieces.log") File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") } end @@ -564,7 +627,7 @@ class Status user = ENV["USER"] script = File.basename($0) pid = $$ - path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") + path = File.join(ENV["BP_TMP"], [user, script, pid, "status"].join(".")) end # Get the elapsed time from the difference between two time stamps. @@ -574,87 +637,6 @@ class Status end -class Stream < IO - # Open Biopieces output data stream for reading from stdin or a file - # specified in options[:stream_in] OR writing to stdout or a file - # specified in options[:stream_out] or options[:data_out]. - def self.open(options, mode, stdio) - if mode == "r" - if options[:data_in] and options[:data_in].first == "-" - self.nread(["-"]) - else - $stdin.tty? ? read(options[:stream_in]) : stdio - end - elsif mode == "w" - options[:stream_out] ? self.write(options[:stream_out], options[:compress]) : stdio - else - raise "Bad mode #{mode}" - end - end - - private - - # Opens a reads stream to a list of files. - def self.read(files) - return if files.nil? #TODO case/when - self.zipped?(files) ? self.zread(files) : self.nread(files) - end - - # Opens a write stream to a file and returns a _io_ object. - def self.write(file, zip=nil) - zip ? self.zwrite(file) : self.nwrite(file) - end - - # Opens a list of gzipped files for reading and return an _io_ object. - def self.zread(files) - stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for gzipped writing and return an _io_ object. - def self.zwrite(file) - stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}") - stderr.close - stdout.close - stdin - end - - # Opens a list of files for reading and return an _io_ object. - def self.nread(files) - stdin, stdout, stderr = Open3.popen3("cat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for writing and return an _io_ object. - def self.nwrite(file) - File.open(file, mode="w") - end - - # Test if a list of files are gzipped or not. - # Raises if files are mixed zipped and unzipped. - def self.zipped?(files) - type_hash = {} - - files.each do |file| - type = `file #{file}` - - if type =~ /gzip compressed/ - type_hash[:gzip] = true - else - type_hash[:ascii] = true - end - end - - raise "Mixture of zipped and unzipped files" if type_hash.size == 2 - - type_hash[:gzip] - end -end - # Set status when 'biopieces' is required. Status.new.set @@ -682,3 +664,4 @@ end __END__ +