From: martinahansen Date: Tue, 15 Oct 2013 17:45:28 +0000 (+0000) Subject: rewrote write_tab X-Git-Url: https://git.donarmstrong.com/?p=biopieces.git;a=commitdiff_plain;h=041e3704a12bb721c4dd3ee3f28bf286c5654e33 rewrote write_tab git-svn-id: http://biopieces.googlecode.com/svn/trunk@2231 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/write_tab b/bp_bin/write_tab index b512d94..c852939 100755 --- a/bp_bin/write_tab +++ b/bp_bin/write_tab @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,129 +18,123 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Write tabular output from the stream. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Maasha::Fasta; -use Maasha::Biopieces; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Write tabular output from the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -my ( $options, $in, $out, $record, $data_out, @vals, $ok, $key, @keys, $A, $B, %no_keys, $sort_keys, $check_sort ); - -$options = Maasha::Biopieces::parse_options( - [ - { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'comment', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'delimit', short => 'd', type => 'string', mandatory => 'no', default => "\t", allowed => undef, disallowed => undef }, - { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'no_keys', short => 'K', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); -$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); - -map { $no_keys{ $_ } = 1 } @{ $options->{ "no_keys" } }; - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - undef @vals; - $ok = 1; - - if ( $options->{ "keys" } ) - { - map { $ok = 0 if not exists $record->{ $_ } } @{ $options->{ "keys" } }; - - if ( $ok ) - { - foreach $key ( @{ $options->{ "keys" } } ) - { - if ( exists $record->{ $key } ) - { - push @keys, $key if $options->{ "comment" }; - push @vals, $record->{ $key }; - } - } - } - } +class Numeric + def commify + self.to_s.gsub(/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/, '\1,') + end +end + +require 'terminal-table' +require 'maasha/biopieces' +require 'maasha/filesys' + +casts = [] +casts << {long: 'no_stream', short: 'x', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'pretty', short: 'p', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'comment', short: 'c', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'commify', short: 'C', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'delimit', short: 'd', type: 'string', mandatory: false, default: "\t", allowed: nil, disallowed: nil} +casts << {long: 'keys', short: 'k', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'no_keys', short: 'K', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'data_out', short: 'o', type: 'file', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'compress', short: 'Z', type: 'string', mandatory: false, default: nil, allowed: "gzip,bzip,bzip2", disallowed: nil} + +options = Biopieces.options_parse(ARGV, casts) + +compress = options[:compress] ? options[:compress].to_sym : nil + +raise "--data_out is mandatory for compressed output" if compress and not options[:data_out] + +rows = [] +headings = nil +comment = true if options[:comment] +no_keys = options[:no_keys].each_with_object({}) { |i, h| h[i.to_sym] = true } if options[:no_keys] + +def columns_order(record) + record.keys.each do |key| + unless key.match(/^V\d+$/) + return record.keys + end + end + + record.keys.sort { |a, b| a.to_s[1 .. a.to_s.size].to_i <=> b.to_s[1 .. a.to_s.size].to_i } +end + +tab_out = options[:data_out] ? Filesys.open(options[:data_out], 'w', compress: compress) : STDOUT + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each do |record| + unless headings + if options[:keys] + headings = options[:keys].map { |k| k.to_sym } + else + headings = columns_order(record) + end + + headings.reject! {|r| no_keys[r] } if options[:no_keys] + end + + row = record.values_at(*headings) + + if options[:pretty] + rows << row else - { - if ( not $check_sort ) - { - $sort_keys = 1; - - map { $sort_keys = 0 if $_ !~ /^V(\d+)$/ } keys %{ $record }; - - $check_sort = 1; - } - - if ( $sort_keys ) - { - foreach $key ( sort { $A = $a; $B = $b; $A =~ s/^V(\d+)$/$1/; $B =~ s/^V(\d+)$/$1/; $A <=> $B } keys %{ $record } ) - { - next if exists $no_keys{ $key }; - - push @keys, $key if $options->{ "comment" }; - push @vals, $record->{ $key }; - } - } - else - { - foreach $key ( keys %{ $record } ) - { - next if exists $no_keys{ $key }; - - push @keys, $key if $options->{ "comment" }; - push @vals, $record->{ $key }; - } - - } - } - - if ( @keys and $options->{ "comment" } ) - { - print $data_out "#", join( $options->{ "delimit" }, @keys ), "\n"; - - delete $options->{ "comment" }; - } - - print $data_out join( $options->{ "delimit" }, @vals ), "\n" if @vals; - - Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; -} - -close $data_out; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -BEGIN -{ - Maasha::Biopieces::status_set(); -} - - -END -{ - Maasha::Biopieces::status_log(); -} + if comment + tab_out.puts "#" + headings.join(options[:delimit]) unless headings.empty? + comment = false + end + + tab_out.puts row.join(options[:delimit]) unless row.empty? + end + + output.puts record unless options[:no_stream] + end +end + +if options[:pretty] + table = Terminal::Table.new + table.headings = headings if options[:comment] + + first_row = rows.first.dup + + if options[:commify] + rows.each do |row| + row.each_with_index do |cell, i| + begin Integer(cell) + row[i] = cell.to_i.commify + rescue + begin Float(cell) + row[i] = cell.to_f.commify + rescue + end + end + end + end + end + + table.rows = rows + + first_row.each_with_index do |cell, i| + begin Float(cell) + table.align_column(i, :right) + rescue + end + end + + tab_out.puts table +end + +tab_out.close # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_test/out/write_tab.out.7 b/bp_test/out/write_tab.out.7 new file mode 100644 index 0000000..9895e0f --- /dev/null +++ b/bp_test/out/write_tab.out.7 @@ -0,0 +1,6 @@ ++-------+-------+-----------+ +| Human | 23524 | ATACGTCAG | +| Dog | 2442 | AGCATGAC | +| Mouse | 234 | GACTG | +| Cat | 2342 | AAATGCA | ++-------+-------+-----------+ diff --git a/bp_test/out/write_tab.out.8 b/bp_test/out/write_tab.out.8 new file mode 100644 index 0000000..b1b182d --- /dev/null +++ b/bp_test/out/write_tab.out.8 @@ -0,0 +1,6 @@ ++-------+--------+-----------+ +| Human | 23,524 | ATACGTCAG | +| Dog | 2,442 | AGCATGAC | +| Mouse | 234 | GACTG | +| Cat | 2,342 | AAATGCA | ++-------+--------+-----------+ diff --git a/bp_test/out/write_tab.out.9 b/bp_test/out/write_tab.out.9 new file mode 100644 index 0000000..0c4740b --- /dev/null +++ b/bp_test/out/write_tab.out.9 @@ -0,0 +1,8 @@ ++----------+--------+-----------+ +| Organism | Count | Sequence | ++----------+--------+-----------+ +| Human | 23,524 | ATACGTCAG | +| Dog | 2,442 | AGCATGAC | +| Mouse | 234 | GACTG | +| Cat | 2,342 | AAATGCA | ++----------+--------+-----------+ diff --git a/bp_test/test/test_write_tab b/bp_test/test/test_write_tab index 7209fd7..6670702 100755 --- a/bp_test/test/test_write_tab +++ b/bp_test/test/test_write_tab @@ -14,11 +14,16 @@ run "$bp -I $in -d ',' -o $tmp -x" assert_no_diff $tmp $out.3 clean -run "$bp -I $in -Z -o $tmp.gz -x" +run "$bp -I $in -Z gzip -o $tmp.gz -x" gunzip $tmp.gz assert_no_diff $tmp $out.4 clean +run "$bp -I $in -Z bzip2 -o $tmp.bz2 -x" +bunzip2 $tmp.bz2 +assert_no_diff $tmp $out.4 +clean + run "$bp -I $in -k 'Count' -o $tmp -x" assert_no_diff $tmp $out.5 clean @@ -26,3 +31,15 @@ clean run "$bp -I $in -K 'Count' -o $tmp -x" assert_no_diff $tmp $out.6 clean + +run "$bp -I $in -p -o $tmp -x" +assert_no_diff $tmp $out.7 +clean + +run "$bp -I $in -p -C -o $tmp -x" +assert_no_diff $tmp $out.8 +clean + +run "$bp -I $in -p -c -C -o $tmp -x" +assert_no_diff $tmp $out.9 +clean diff --git a/bp_test/test_all b/bp_test/test_all index 4a1aa13..e94395f 100755 --- a/bp_test/test_all +++ b/bp_test/test_all @@ -16,6 +16,7 @@ test_ruby test_ruby_gem "gnuplot" test_ruby_gem "narray" test_ruby_gem "RubyInline" +test_ruby_gem "terminal-table" test_aux_program "blastall" test_aux_program "blat" test_aux_program "bwa"