X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=blobdiff_plain;f=txt2xls;h=c79fb53b2d1c2ff63594602f14c280b34ac97cbc;hp=147ace9479185c0028719b8681bad706e4a1fe47;hb=4621d23526ccf936d1ad076ece0bc663510dfc52;hpb=a0f50a48f47db3337b33310770e0e3d162398708

diff --git a/txt2xls b/txt2xls
index 147ace9..c79fb53 100755
--- a/txt2xls
+++ b/txt2xls
@@ -25,6 +25,9 @@ txt2xls - Turns a (set of) text file(s) into an excel workbook
   --ssv, -s space separated value mode
   --csv, -c comma separated value mode
   --r-mode, -r R mode (Default)
+  --auto-format Auto format (Default)
+  --sci-format Scientific format formatting string (Default 0.00E+0)
+  --max-digits Maximum digits to use for auto-format (Default 4)
   --debug, -d debugging level (Default 0)
   --help, -h display this help
   --man, -m display manual
@@ -33,6 +36,27 @@ txt2xls - Turns a (set of) text file(s) into an excel workbook
 
 =over
 
+=item B<--auto-format>
+
+Attempt to automatically format the excel file. Currently, this does
+nothing for non-numeric entries. For numeric entries, if the number is
+very large (> 9999), or less than 0.001, but not 0, the cell is put
+into scientific format (B<--sci-format>, default C<0.00E+0>).
+Otherwise, the cell is formatted to use at maximum B<--max-digits>
+(default 4) digits.
+
+To disable, use B<--no-auto-format>
+
+=item B<--sci-format>
+
+Excel format string to use for scientific format. See
+L<http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx>
+for details. (Default C<'0.00E+0'>)
+
+=item B<--max-digits>
+
+Maximum number digits to display for non-scientific formats. (Default 4)
+
 =item B<--debug, -d>
 
 Debug verbosity. (Default 0)
@@ -57,10 +81,15 @@ use vars qw($DEBUG);
 
 use Text::CSV;
 use Spreadsheet::WriteExcel;
+use Scalar::Util qw(looks_like_number);
+use POSIX qw(floor);
 
 my %options = (debug           => 0,
 	       help            => 0,
 	       man             => 0,
+               auto_format     => 1,
+               sci_format      => '0.00E+0',
+               max_digits      => 4,
 	       remove_name     => [],
 	       );
 
@@ -68,8 +97,12 @@ GetOptions(\%options,
 	   'tsv|t',
 	   'ssv|s',
 	   'csv|c',
+           'auto_format|auto-format!',
+           'sci_format|sci-format=s',
+           'max_digits|max-digits=i',
 	   'rmode|r-mode|r!',
-	   'remove_name|remove-name=s@',
+           'remove_name|remove-name=s@',
+           'output|output_file|output-file=s',
 	   'debug|d+','help|h|?','man|m');
 
 pod2usage() if $options{help};
@@ -100,8 +133,20 @@ elsif ($options{ssv}) {
     $sep_char = ' ';
 }
 
+if (not @{$options{remove_name}}) {
+    $options{remove_name} = ['.+\/',
+                            ];
+}
+
+my %wb_formats = ();
 my $csv = Text::CSV->new({sep_char=>$sep_char});
-my $wb = Spreadsheet::WriteExcel->new(\*STDOUT);
+my $wb;
+if (defined $options{output}) {
+    $wb =  Spreadsheet::WriteExcel->new($options{output});
+} else {
+    $wb = Spreadsheet::WriteExcel->new(\*STDOUT);
+}
+
 for my $file (@ARGV) {
     my $fh;
     if (not defined $file) {
@@ -109,12 +154,12 @@ for my $file (@ARGV) {
 	$file = "STDIN";
     }
     else {
-	$fh = IO::File->new($file,'r') or
+	open($fh,'<:encoding(utf8)',$file) or
 	    die "Unable to open $file for reading: $!";
     }
     my $ws_name = $file;
     foreach my $remove (@{$options{remove_name}}) {
-	$ws_name =~ s{\Q$remove\E}{}g;
+        $ws_name =~ s{$remove}{}g;
     }
     $ws_name =~ s{\.[^\.]+$}{}g;
     $ws_name =~ s/_+/ /g;
@@ -132,7 +177,7 @@ for my $file (@ARGV) {
     while (<$fh>) {
 	chomp;
 	# parse the line
-	die "Unable to parse line $. of $file" unless $csv->parse($_);
+	die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
 	my @row = $csv->fields();
 	if ($row==1 and not $r_mode_autodetected) {
 	    @header_row = @row;
@@ -148,30 +193,58 @@ for my $file (@ARGV) {
 		# R doesn't output headers for rownames
 		unshift @header_row,'';
 	    }
-	    output_row(\@header_row,1,$ws);
+	    output_row(\@header_row,1,$ws,$wb,\%wb_formats,\%options);
 	}
 	if ($row > 65536) { # ok, we're going to overflow here
 	    my $t_ws_name = $ws_name;
 	    my $maxlen = 31-length('.'.$overflow);
 	    $t_ws_name =~ s{^(.{0,$maxlen}).*$}{$1};
-	    $ws = $wb->add_worksheet($ws_name.'.'.$overflow);
+	    $ws = $wb->add_worksheet($t_ws_name.'.'.$overflow);
+            $overflow++;
 	    $row=1;
-	    output_row(\@header_row,$row,$ws);
+	    output_row(\@header_row,$row,$ws,$wb,\%wb_formats,\%options);
 	    $row++;
 	}
 	if ($row==1) {
 	    @header_row = @row;
 	}
-	output_row(\@row,$row,$ws);
+	output_row(\@row,$row,$ws,$wb,\%wb_formats,\%options);
 	$row++;
     }
 }
 
 sub output_row{
-    my ($data,$row,$ws) = @_;
+    my ($data,$row,$ws,$wb,$formats,$options) = @_;
     my @columns = ('A'..'Z','AA'..'ZZ');
     for my $i (0..$#{$data}) {
-	$ws->write($columns[$i].$row,$data->[$i]);
+        my $format;
+        if ($options->{auto_format}) {
+            if (looks_like_number($data->[$i])) {
+                my $format_string;
+                # use scientific format?
+                if ($data->[$i] != 0 && (abs($data->[$i]) > 9999 ||
+                                         abs($data->[$i]) < 0.001)) {
+                    $format_string = $options->{sci_format};
+                } else {
+                    $format_string = '0';
+                    my $digits = length(floor(abs($data->[$i])));
+                    if ($options->{max_digits} - $digits > 0 and
+                        abs($data->[$i]) != abs(floor($data->[$i]))) {
+                        # if there are digits left over, use them for
+                        # decimal places, but don't require them
+                        $format_string = $format_string.'.'.('#'x($options->{max_digits} - $digits))
+                    }
+                }
+                if (not exists $formats->{$format_string}) {
+                    $formats->{$format_string} =
+                        $wb->add_format();
+                    $formats->{$format_string}->
+                        set_num_format($format_string);
+                }
+                $format = $formats->{$format_string};
+            }
+        }
+        $ws->write($columns[$i].$row,$data->[$i],defined $format?$format:())
     }
 }