X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fwrite_tab;h=c85293928f4a9a4782aeff5fb4814473660f0b8a;hb=041e3704a12bb721c4dd3ee3f28bf286c5654e33;hp=e20794db2189e5d58ed4a816345e3aadbeee7ce6;hpb=b607bb9d6e4adcebd9f9e128aacc91a212010ee0;p=biopieces.git diff --git a/bp_bin/write_tab b/bp_bin/write_tab index e20794d..c852939 100755 --- a/bp_bin/write_tab +++ b/bp_bin/write_tab @@ -1,6 +1,6 @@ -#!/usr/bin/env perl -w +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,131 +18,123 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Write tabular output from the stream. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use strict; -use Maasha::Fasta; -use Maasha::Biopieces; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Write tabular output from the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -my ( $options, $in, $out, $record, $data_out, @vals, $ok, $key, @keys, $A, $B, %no_keys, $sort_keys, $check_sort ); - -$options = Maasha::Biopieces::parse_options( - [ - { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'comment', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'delimit', short => 'd', type => 'string', mandatory => 'no', default => "\t", allowed => undef, disallowed => undef }, - { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'no_keys', short => 'K', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); -$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); - -map { $no_keys{ $_ } = 1 } @{ $options->{ "no_keys" } }; - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - undef @vals; - $ok = 1; - - if ( $options->{ "keys" } ) - { - map { $ok = 0 if not exists $record->{ $_ } } @{ $options->{ "keys" } }; - - if ( $ok ) - { - foreach $key ( @{ $options->{ "keys" } } ) - { - if ( exists $record->{ $key } ) - { - push @keys, $key if $options->{ "comment" }; - push @vals, $record->{ $key }; - } - } - } - } +class Numeric + def commify + self.to_s.gsub(/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/, '\1,') + end +end + +require 'terminal-table' +require 'maasha/biopieces' +require 'maasha/filesys' + +casts = [] +casts << {long: 'no_stream', short: 'x', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'pretty', short: 'p', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'comment', short: 'c', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'commify', short: 'C', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'delimit', short: 'd', type: 'string', mandatory: false, default: "\t", allowed: nil, disallowed: nil} +casts << {long: 'keys', short: 'k', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'no_keys', short: 'K', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'data_out', short: 'o', type: 'file', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'compress', short: 'Z', type: 'string', mandatory: false, default: nil, allowed: "gzip,bzip,bzip2", disallowed: nil} + +options = Biopieces.options_parse(ARGV, casts) + +compress = options[:compress] ? options[:compress].to_sym : nil + +raise "--data_out is mandatory for compressed output" if compress and not options[:data_out] + +rows = [] +headings = nil +comment = true if options[:comment] +no_keys = options[:no_keys].each_with_object({}) { |i, h| h[i.to_sym] = true } if options[:no_keys] + +def columns_order(record) + record.keys.each do |key| + unless key.match(/^V\d+$/) + return record.keys + end + end + + record.keys.sort { |a, b| a.to_s[1 .. a.to_s.size].to_i <=> b.to_s[1 .. a.to_s.size].to_i } +end + +tab_out = options[:data_out] ? Filesys.open(options[:data_out], 'w', compress: compress) : STDOUT + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each do |record| + unless headings + if options[:keys] + headings = options[:keys].map { |k| k.to_sym } + else + headings = columns_order(record) + end + + headings.reject! {|r| no_keys[r] } if options[:no_keys] + end + + row = record.values_at(*headings) + + if options[:pretty] + rows << row else - { - if ( not $check_sort ) - { - $sort_keys = 1; - - map { $sort_keys = 0 if $_ !~ /^V(\d+)$/ } keys %{ $record }; - - $check_sort = 1; - } - - if ( $sort_keys ) - { - foreach $key ( sort { $A = $a; $B = $b; $A =~ s/^V(\d+)$/$1/; $B =~ s/^V(\d+)$/$1/; $A <=> $B } keys %{ $record } ) - { - next if exists $no_keys{ $key }; - - push @keys, $key if $options->{ "comment" }; - push @vals, $record->{ $key }; - } - } - else - { - foreach $key ( keys %{ $record } ) - { - next if exists $no_keys{ $key }; - - push @keys, $key if $options->{ "comment" }; - push @vals, $record->{ $key }; - } - - } - } - - if ( @keys and $options->{ "comment" } ) - { - print $data_out "#", join( $options->{ "delimit" }, @keys ), "\n"; - - delete $options->{ "comment" }; - } - - print $data_out join( $options->{ "delimit" }, @vals ), "\n" if @vals; - - Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; -} - -close $data_out; - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -BEGIN -{ - $run_time_beg = Maasha::Biopieces::run_time(); - - Maasha::Biopieces::log_biopiece(); -} - -END -{ - Maasha::Biopieces::close_stream( $in ); - Maasha::Biopieces::close_stream( $out ); - - $run_time_end = Maasha::Biopieces::run_time(); - - Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options ); -} + if comment + tab_out.puts "#" + headings.join(options[:delimit]) unless headings.empty? + comment = false + end + + tab_out.puts row.join(options[:delimit]) unless row.empty? + end + + output.puts record unless options[:no_stream] + end +end + +if options[:pretty] + table = Terminal::Table.new + table.headings = headings if options[:comment] + + first_row = rows.first.dup + + if options[:commify] + rows.each do |row| + row.each_with_index do |cell, i| + begin Integer(cell) + row[i] = cell.to_i.commify + rescue + begin Float(cell) + row[i] = cell.to_f.commify + rescue + end + end + end + end + end + + table.rows = rows + + first_row.each_with_index do |cell, i| + begin Float(cell) + table.align_column(i, :right) + rescue + end + end + + tab_out.puts table +end + +tab_out.close # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<