From 68f0a4925eda29d8fec8a4cbf934cd01e5ff213d Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 15 Jun 2010 11:55:05 +0000 Subject: [PATCH] ruby code schrubbing git-svn-id: http://biopieces.googlecode.com/svn/trunk@981 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/Maasha/lib/biopieces.rb | 319 ++++++++++++++++-------------- 1 file changed, 170 insertions(+), 149 deletions(-) diff --git a/code_ruby/Maasha/lib/biopieces.rb b/code_ruby/Maasha/lib/biopieces.rb index 5f054f1..0135ad0 100644 --- a/code_ruby/Maasha/lib/biopieces.rb +++ b/code_ruby/Maasha/lib/biopieces.rb @@ -1,3 +1,5 @@ +require 'fileutils' +require 'date' require 'optparse' require 'open3' require 'pp' @@ -18,7 +20,8 @@ class Biopieces @test = test @input = input @output = output - status_set unless @test + @status = Status.new + @status.set unless @test end # Check the integrity of a list of casts, followed by parsion options from argv @@ -33,7 +36,7 @@ class Biopieces # Open Biopiece input stream if not open and iterate over all Biopiece # records in the stream. def each_record - @in = stream_in_open unless @in.is_a? IO + @in = Stream::open(@options, mode="r", @input) unless @in.is_a? IO record = {} @@ -58,7 +61,7 @@ class Biopieces # Open Biopiece output stream if not open and puts record to the stream. def puts(record) - @out = stream_out_open unless @out.is_a? IO + @out = Stream::open(@options, mode="w", @output) unless @out.is_a? IO record.each do |key,value| @out.print "#{key}: #{value}\n" @@ -67,160 +70,33 @@ class Biopieces @out.print "---\n" end - # Close Biopiece streams, remove tmp_dir, end log status. + # Create a temporary directory inside the ENV["BP_TMP"] dir. + def mktmpdir + time = Time.now.to_i + user = ENV["USER"] + pid = $$ + path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_") + Dir.mkdir(path) + @status.set_tmpdir(path) + path + end + + # Close Biopiece streams, remove tmp_dir, log status, and delete status file. def clean @in.close if @in.respond_to? :close @out.close if @out.respond_to? :close - # remove tmpdir if found - status_log - # remove status file + rmtmpdir + @status.log + @status.delete end private - # Open Biopieces input data stream for reading from either - # stdin or from a list of files specified in options["stream_in"]. - def stream_in_open - $stdin.tty? ? read(@options["stream_in"]) : @input - end - - # Open Biopieces output data stream for writing to stdout - # or a file specified in options["stream_out"]. - def stream_out_open - @options["stream_out"] ? write(@options["stream_out"], @options["compress"]) : @output - end - - # Opens a reads stream to a list of files. - def read(files) - zipped?(files) ? zread(files) : nread(files) - end + # Remove temporary directory, if existing, and it's content. + def rmtmpdir + tmp_path = @status.get_tmpdir - # Opens a write stream to a file and returns a _io_ object. - def write(file, zip=nil) - zip ? zwrite(file) : nwrite(file) - end - - # Test if a list of files are gzipped or not. - # Raises if files are mixed zipped and unzipped. - def zipped?(files) - type_hash = {} - - files.each do |file| - type = `file #{file}` - - if type =~ /gzip compressed/ - type_hash[:gzip] = true - else - type_hash[:ascii] = true - end - end - - raise "Mixture of zipped and unzipped files" if type_hash.size == 2 - - type_hash[:gzip] - end - - # Opens a list of gzipped files for reading and return an _io_ object. - def zread(files) - stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for gzipped writing and return an _io_ object. - def zwrite(file) - stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}") - stderr.close - stdout.close - stdin - end - - # Opens a list of files for reading and return an _io_ object. - def nread(files) - stdin, stdout, stderr = Open3.popen3("cat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for writing and return an _io_ object. - def nwrite(file) - File.open(file, mode="w") - end - - # Write the status to a status file. - def status_set - now = Time.new - time = now.strftime("%Y-%m-%d %X") - user = ENV["USER"] - script = File.basename($0, ".rb") - pid = $$ - path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") - - File.open(path, mode="w") { |file| file.puts [time, ARGV.join(" ")].join(";") } - end - - # Write the Biopiece status to the log file. - def status_log(status="OK") - now = Time.new - time1 = now.strftime("%Y-%m-%d %X") - user = ENV["USER"] - script = File.basename($0, ".rb") - pid = $$ - path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") - - stream = File.open(path) - time0, args, tmp_dir = stream.first.split(";") - - # Dir.rmdir(tmp_dir) unless tmp_dir.nil? and File.directory? tmp_dir #TODO fix this! - - elap = time_diff(time0, time1) - command = [script, args].join(" ") - log_file = ENV["BP_LOG"] + "/biopieces.log" - - File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, status, command].join("\t") } - end - - # Get the elapsed time from the difference between two time stamps. - def time_diff(t0, t1) - t0 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/ - year0 = $1.to_i - mon0 = $2.to_i - day0 = $3.to_i - hour0 = $4.to_i - min0 = $5.to_i - sec0 = $6.to_i - - sec0 += day0 * 24 * 60 * 60 - sec0 += hour0 * 60 * 60 - sec0 += min0 * 60 - - t1 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/ - year1 = $1.to_i - mon1 = $2.to_i - day1 = $3.to_i - hour1 = $4.to_i - min1 = $5.to_i - sec1 = $6.to_i - - sec1 += day1 * 24 * 60 * 60 - sec1 += hour1 * 60 * 60 - sec1 += min1 * 60 - - year = year1 - year0 - mon = mon1 - mon0 - day = day1 - day0 - - sec = sec1 - sec0 - - hour = ( sec / ( 60 * 60 ) ).to_i - sec -= hour * 60 * 60 - - min = ( sec / 60 ).to_i - sec -= min * 60 - - [sprintf("%02d", hour), sprintf("%02d", min), sprintf("%02d", sec)].join(":") + FileUtils.remove_entry_secure(tmp_path) end end @@ -238,6 +114,8 @@ class Casts < Array TYPES = %w[flag string list int uint float file file! files files! dir dir! genome] MANDATORY = %w[long short type mandatory default allowed disallowed] + # Initialize cast object with an optional options cast list to which + # ubiquitous casts are added after which all casts are checked. def initialize(cast_list=[]) @cast_list = cast_list ubiquitous @@ -580,5 +458,148 @@ class OptionHandler end end +# Class for manipulating the execution status of Biopieces by setting a +# status file with a time stamp, process id, and command arguments. The +# status file is used for creating log entries and for displaying the +# runtime status of Biopieces. +class Status + # Write the status to a status file. + def set + time0 = Time.new.strftime("%Y-%m-%d %X") + + File.open(path, mode="w") { |file| file.puts [time0, ARGV.join(" ")].join(";") } + end + + # Append the a temporary directory path to the status file. + def set_tmpdir(tmp_path) + status = File.open(path, mode="r").read.chomp + + File.open(path, mode="w") do |fh| + fh << [status, tmp_path].join(";") + "\n" + end + end + + # Extract the temporary directory path from the status file, + # and return this or nil if not found. + def get_tmpdir + tmp_path = File.open(path, mode="r").read.chomp.split(";").last + + File.directory?(tmp_path) ? tmp_path : nil + end + + # Write the Biopiece status to the log file. + def log(status="OK") + time1 = Time.new.strftime("%Y-%m-%d %X") + user = ENV["USER"] + script = File.basename($0, ".rb") + + stream = File.open(path) + time0, args, tmp_dir = stream.first.split(";") + stream.close + + elap = time_diff(time0, time1) + command = [script, args].join(" ") + log_file = ENV["BP_LOG"] + "/biopieces.log" + + File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, status, command].join("\t") } + end + + # Delete status file. + def delete + File.delete(path) + end + + private + + # Path to status file + def path + user = ENV["USER"] + script = File.basename($0, ".rb") + pid = $$ + path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") + end + + # Get the elapsed time from the difference between two time stamps. + def time_diff(t0, t1) + Time.at((DateTime.parse(t1).to_time - DateTime.parse(t0).to_time).to_i).gmtime.strftime('%X') + end +end + + +class Stream < IO + # Open Biopieces output data stream for reading from stdin or a file + # specified in options["stream_in"] OR writing to stdout or a file + # specified in options["stream_out"]. + def self.open(options, mode, stdio) + if mode == "r" + $stdin.tty? ? read(options["stream_in"]) : stdio + elsif mode == "w" + options["stream_out"] ? self.write(options["stream_out"], options["compress"]) : stdio + else + raise "Bad mode #{mode}" + end + end + + private + + # Opens a reads stream to a list of files. + def self.read(files) + self.zipped?(files) ? self.zread(files) : self.nread(files) + end + + # Opens a write stream to a file and returns a _io_ object. + def self.write(file, zip=nil) + zip ? self.zwrite(file) : self.nwrite(file) + end + + # Opens a list of gzipped files for reading and return an _io_ object. + def self.zread(files) + stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' ')); + stdin.close + stderr.close + stdout + end + + # Opens a file for gzipped writing and return an _io_ object. + def self.zwrite(file) + stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}") + stderr.close + stdout.close + stdin + end + + # Opens a list of files for reading and return an _io_ object. + def self.nread(files) + stdin, stdout, stderr = Open3.popen3("cat " + files.join(' ')); + stdin.close + stderr.close + stdout + end + + # Opens a file for writing and return an _io_ object. + def self.nwrite(file) + File.open(file, mode="w") + end + + # Test if a list of files are gzipped or not. + # Raises if files are mixed zipped and unzipped. + def self.zipped?(files) + type_hash = {} + + files.each do |file| + type = `file #{file}` + + if type =~ /gzip compressed/ + type_hash[:gzip] = true + else + type_hash[:ascii] = true + end + end + + raise "Mixture of zipped and unzipped files" if type_hash.size == 2 + + type_hash[:gzip] + end +end __END__ -- 2.39.5