--ssv, -s space separated value mode
--csv, -c comma separated value mode
--r-mode, -r R mode (Default)
+ --auto-format Auto format (Default)
+ --sci-format Scientific format formatting string (Default 0.00E+0)
+ --max-digits Maximum digits to use for auto-format (Default 4)
--debug, -d debugging level (Default 0)
--help, -h display this help
--man, -m display manual
=over
+=item B<--auto-format>
+
+Attempt to automatically format the excel file. Currently, this does
+nothing for non-numeric entries. For numeric entries, if the number is
+very large (> 9999), or less than 0.001, but not 0, the cell is put
+into scientific format (B<--sci-format>, default C<0.00E+0>).
+Otherwise, the cell is formatted to use at maximum B<--max-digits>
+(default 4) digits.
+
+To disable, use B<--no-auto-format>
+
+=item B<--sci-format>
+
+Excel format string to use for scientific format. See
+L<http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx>
+for details. (Default C<'0.00E+0'>)
+
+=item B<--max-digits>
+
+Maximum number digits to display for non-scientific formats. (Default 4)
+
=item B<--debug, -d>
Debug verbosity. (Default 0)
use Text::CSV;
use Spreadsheet::WriteExcel;
+use Scalar::Util qw(looks_like_number);
+use POSIX qw(floor);
my %options = (debug => 0,
help => 0,
man => 0,
- rmode => 1,
+ auto_format => 1,
+ sci_format => '0.00E+0',
+ max_digits => 4,
remove_name => [],
);
'tsv|t',
'ssv|s',
'csv|c',
+ 'auto_format|auto-format!',
+ 'sci_format|sci-format=s',
+ 'max_digits|max-digits=i',
'rmode|r-mode|r!',
- 'remove_name|remove-name=s@',
+ 'remove_name|remove-name=s@',
+ 'output|output_file|output-file=s',
'debug|d+','help|h|?','man|m');
pod2usage() if $options{help};
pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
-my @columns = ('A'..'Z','AA'..'ZZ');
-
if (not @ARGV) {
# we'll use this as a special indicator to read stdin
push @ARGV,undef;
$sep_char = ' ';
}
+if (not @{$options{remove_name}}) {
+ $options{remove_name} = ['.+\/',
+ ];
+}
+
+my %wb_formats = ();
my $csv = Text::CSV->new({sep_char=>$sep_char});
-my $wb = Spreadsheet::WriteExcel->new(\*STDOUT);
+my $wb;
+if (defined $options{output}) {
+ $wb = Spreadsheet::WriteExcel->new($options{output});
+} else {
+ $wb = Spreadsheet::WriteExcel->new(\*STDOUT);
+}
+
for my $file (@ARGV) {
my $fh;
if (not defined $file) {
$file = "STDIN";
}
else {
- $fh = IO::File->new($file,'r') or
+ open($fh,'<:encoding(utf8)',$file) or
die "Unable to open $file for reading: $!";
}
my $ws_name = $file;
foreach my $remove (@{$options{remove_name}}) {
- $ws_name =~ s{\Q$remove\E}{}g;
+ $ws_name =~ s{$remove}{}g;
}
$ws_name =~ s{\.[^\.]+$}{}g;
$ws_name =~ s/_+/ /g;
my $row = 1;
my @header_row;
my $overflow = 0;
+ my $r_mode = $options{r_mode} // 0;
+ # set to 1 if we have attempted to autodetect R mode
+ my $r_mode_autodetected = 0;
while (<$fh>) {
chomp;
# parse the line
- my @row;
- die "Unable to parse line $. of $file" unless $csv->parse($_);
- if ($row==1 and $options{rmode}) {
- # R doesn't output headers for rownames
- push @row,'';
+ die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
+ my @row = $csv->fields();
+ if ($row==1 and not $r_mode_autodetected) {
+ @header_row = @row;
+ $row++;
+ next;
+ }
+ if ($row==2 and not $r_mode_autodetected) {
+ $r_mode_autodetected = 1;
+ if (@row == (@header_row+1)) {
+ $r_mode = 1 unless exists $options{r_mode} and defined $options{r_mode};
+ }
+ if ($r_mode) {
+ # R doesn't output headers for rownames
+ unshift @header_row,'';
+ }
+ output_row(\@header_row,1,$ws,$wb,\%wb_formats,\%options);
}
if ($row > 65536) { # ok, we're going to overflow here
my $t_ws_name = $ws_name;
my $maxlen = 31-length('.'.$overflow);
$t_ws_name =~ s{^(.{0,$maxlen}).*$}{$1};
- $ws = $wb->add_worksheet($ws_name.'.'.$overflow);
+ $ws = $wb->add_worksheet($t_ws_name.'.'.$overflow);
+ $overflow++;
$row=1;
- for my $i (0..@header_row) {
- $ws->write($columns[$i].$row,$header_row[$i]);
- }
+ output_row(\@header_row,$row,$ws,$wb,\%wb_formats,\%options);
$row++;
}
- push @row,$csv->fields();
if ($row==1) {
@header_row = @row;
}
- for my $i (0..$#row) {
- $ws->write($columns[$i].$row,$row[$i]);
- }
+ output_row(\@row,$row,$ws,$wb,\%wb_formats,\%options);
$row++;
}
}
+sub output_row{
+ my ($data,$row,$ws,$wb,$formats,$options) = @_;
+ my @columns = ('A'..'Z','AA'..'ZZ');
+ for my $i (0..$#{$data}) {
+ my $format;
+ if ($options->{auto_format}) {
+ if (looks_like_number($data->[$i])) {
+ my $format_string;
+ # use scientific format?
+ if ($data->[$i] != 0 && (abs($data->[$i]) > 9999 ||
+ abs($data->[$i]) < 0.001)) {
+ $format_string = $options->{sci_format};
+ } else {
+ $format_string = '0';
+ my $digits = length(floor(abs($data->[$i])));
+ if ($options->{max_digits} - $digits > 0 and
+ abs($data->[$i]) != abs(floor($data->[$i]))) {
+ # if there are digits left over, use them for
+ # decimal places, but don't require them
+ $format_string = $format_string.'.'.('#'x($options->{max_digits} - $digits))
+ }
+ }
+ if (not exists $formats->{$format_string}) {
+ $formats->{$format_string} =
+ $wb->add_format();
+ $formats->{$format_string}->
+ set_num_format($format_string);
+ }
+ $format = $formats->{$format_string};
+ }
+ }
+ $ws->write($columns[$i].$row,$data->[$i],defined $format?$format:())
+ }
+}
+
__END__