X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=blobdiff_plain;f=txt2xls;h=c79fb53b2d1c2ff63594602f14c280b34ac97cbc;hp=147ace9479185c0028719b8681bad706e4a1fe47;hb=4621d23526ccf936d1ad076ece0bc663510dfc52;hpb=a0f50a48f47db3337b33310770e0e3d162398708 diff --git a/txt2xls b/txt2xls index 147ace9..c79fb53 100755 --- a/txt2xls +++ b/txt2xls @@ -25,6 +25,9 @@ txt2xls - Turns a (set of) text file(s) into an excel workbook --ssv, -s space separated value mode --csv, -c comma separated value mode --r-mode, -r R mode (Default) + --auto-format Auto format (Default) + --sci-format Scientific format formatting string (Default 0.00E+0) + --max-digits Maximum digits to use for auto-format (Default 4) --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual @@ -33,6 +36,27 @@ txt2xls - Turns a (set of) text file(s) into an excel workbook =over +=item B<--auto-format> + +Attempt to automatically format the excel file. Currently, this does +nothing for non-numeric entries. For numeric entries, if the number is +very large (> 9999), or less than 0.001, but not 0, the cell is put +into scientific format (B<--sci-format>, default C<0.00E+0>). +Otherwise, the cell is formatted to use at maximum B<--max-digits> +(default 4) digits. + +To disable, use B<--no-auto-format> + +=item B<--sci-format> + +Excel format string to use for scientific format. See +L +for details. (Default C<'0.00E+0'>) + +=item B<--max-digits> + +Maximum number digits to display for non-scientific formats. (Default 4) + =item B<--debug, -d> Debug verbosity. (Default 0) @@ -57,10 +81,15 @@ use vars qw($DEBUG); use Text::CSV; use Spreadsheet::WriteExcel; +use Scalar::Util qw(looks_like_number); +use POSIX qw(floor); my %options = (debug => 0, help => 0, man => 0, + auto_format => 1, + sci_format => '0.00E+0', + max_digits => 4, remove_name => [], ); @@ -68,8 +97,12 @@ GetOptions(\%options, 'tsv|t', 'ssv|s', 'csv|c', + 'auto_format|auto-format!', + 'sci_format|sci-format=s', + 'max_digits|max-digits=i', 'rmode|r-mode|r!', - 'remove_name|remove-name=s@', + 'remove_name|remove-name=s@', + 'output|output_file|output-file=s', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; @@ -100,8 +133,20 @@ elsif ($options{ssv}) { $sep_char = ' '; } +if (not @{$options{remove_name}}) { + $options{remove_name} = ['.+\/', + ]; +} + +my %wb_formats = (); my $csv = Text::CSV->new({sep_char=>$sep_char}); -my $wb = Spreadsheet::WriteExcel->new(\*STDOUT); +my $wb; +if (defined $options{output}) { + $wb = Spreadsheet::WriteExcel->new($options{output}); +} else { + $wb = Spreadsheet::WriteExcel->new(\*STDOUT); +} + for my $file (@ARGV) { my $fh; if (not defined $file) { @@ -109,12 +154,12 @@ for my $file (@ARGV) { $file = "STDIN"; } else { - $fh = IO::File->new($file,'r') or + open($fh,'<:encoding(utf8)',$file) or die "Unable to open $file for reading: $!"; } my $ws_name = $file; foreach my $remove (@{$options{remove_name}}) { - $ws_name =~ s{\Q$remove\E}{}g; + $ws_name =~ s{$remove}{}g; } $ws_name =~ s{\.[^\.]+$}{}g; $ws_name =~ s/_+/ /g; @@ -132,7 +177,7 @@ for my $file (@ARGV) { while (<$fh>) { chomp; # parse the line - die "Unable to parse line $. of $file" unless $csv->parse($_); + die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_); my @row = $csv->fields(); if ($row==1 and not $r_mode_autodetected) { @header_row = @row; @@ -148,30 +193,58 @@ for my $file (@ARGV) { # R doesn't output headers for rownames unshift @header_row,''; } - output_row(\@header_row,1,$ws); + output_row(\@header_row,1,$ws,$wb,\%wb_formats,\%options); } if ($row > 65536) { # ok, we're going to overflow here my $t_ws_name = $ws_name; my $maxlen = 31-length('.'.$overflow); $t_ws_name =~ s{^(.{0,$maxlen}).*$}{$1}; - $ws = $wb->add_worksheet($ws_name.'.'.$overflow); + $ws = $wb->add_worksheet($t_ws_name.'.'.$overflow); + $overflow++; $row=1; - output_row(\@header_row,$row,$ws); + output_row(\@header_row,$row,$ws,$wb,\%wb_formats,\%options); $row++; } if ($row==1) { @header_row = @row; } - output_row(\@row,$row,$ws); + output_row(\@row,$row,$ws,$wb,\%wb_formats,\%options); $row++; } } sub output_row{ - my ($data,$row,$ws) = @_; + my ($data,$row,$ws,$wb,$formats,$options) = @_; my @columns = ('A'..'Z','AA'..'ZZ'); for my $i (0..$#{$data}) { - $ws->write($columns[$i].$row,$data->[$i]); + my $format; + if ($options->{auto_format}) { + if (looks_like_number($data->[$i])) { + my $format_string; + # use scientific format? + if ($data->[$i] != 0 && (abs($data->[$i]) > 9999 || + abs($data->[$i]) < 0.001)) { + $format_string = $options->{sci_format}; + } else { + $format_string = '0'; + my $digits = length(floor(abs($data->[$i]))); + if ($options->{max_digits} - $digits > 0 and + abs($data->[$i]) != abs(floor($data->[$i]))) { + # if there are digits left over, use them for + # decimal places, but don't require them + $format_string = $format_string.'.'.('#'x($options->{max_digits} - $digits)) + } + } + if (not exists $formats->{$format_string}) { + $formats->{$format_string} = + $wb->add_format(); + $formats->{$format_string}-> + set_num_format($format_string); + } + $format = $formats->{$format_string}; + } + } + $ws->write($columns[$i].$row,$data->[$i],defined $format?$format:()) } }