2 # txt2xls turns text files into excel workbooks, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2008 by Don Armstrong <don@donarmstrong.com>.
6 # $Id: perl_script 1153 2008-04-08 00:04:20Z don $
17 txt2xls - Turns a (set of) text file(s) into an excel workbook
24 --tsv, -t tab separated value mode (Default)
25 --ssv, -s space separated value mode
26 --csv, -c comma separated value mode
27 --r-mode, -r R mode (Default)
28 --auto-format Auto format (Default)
29 --sci-format Scientific format formatting string (Default 0.00E+0)
30 --max-digits Maximum digits to use for auto-format (Default 4)
31 --debug, -d debugging level (Default 0)
32 --help, -h display this help
33 --man, -m display manual
39 =item B<--auto-format>
41 Attempt to automatically format the excel file. Currently, this does
42 nothing for non-numeric entries. For numeric entries, if the number is
43 very large (> 9999), or less than 0.001, but not 0, the cell is put
44 into scientific format (B<--sci-format>, default C<0.00E+0>).
45 Otherwise, the cell is formatted to use at maximum B<--max-digits>
48 To disable, use B<--no-auto-format>
52 Excel format string to use for scientific format. See
53 L<http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx>
54 for details. (Default C<'0.00E+0'>)
58 Maximum number digits to display for non-scientific formats. (Default 4)
62 Debug verbosity. (Default 0)
66 Display brief useage information.
83 use Spreadsheet::WriteExcel;
84 use Scalar::Util qw(looks_like_number);
87 my %options = (debug => 0,
91 sci_format => '0.00E+0',
100 'auto_format|auto-format!',
101 'sci_format|sci-format=s',
102 'max_digits|max-digits=i',
104 'remove_name|remove-name=s@',
105 'debug|d+','help|h|?','man|m');
107 pod2usage() if $options{help};
108 pod2usage({verbose=>2}) if $options{man};
110 $DEBUG = $options{debug};
113 if (0 == grep {exists $options{$_}} qw(tsv ssv csv)) {
116 if (1 < grep {exists $options{$_}} qw(tsv ssv csv)) {
117 push @USAGE_ERRORS,"You can only pass one of --tsv, --ssv, or --csv";
120 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
123 # we'll use this as a special indicator to read stdin
131 elsif ($options{ssv}) {
135 if (not @{$options{remove_name}}) {
136 $options{remove_name} = ['.+\/',
141 my $csv = Text::CSV->new({sep_char=>$sep_char});
142 my $wb = Spreadsheet::WriteExcel->new(\*STDOUT);
143 for my $file (@ARGV) {
145 if (not defined $file) {
150 $fh = IO::File->new($file,'r') or
151 die "Unable to open $file for reading: $!";
154 foreach my $remove (@{$options{remove_name}}) {
155 $ws_name =~ s{$remove}{}g;
157 $ws_name =~ s{\.[^\.]+$}{}g;
158 $ws_name =~ s/_+/ /g;
159 $ws_name =~ s{[\]:*?\/\] ]+}{ }g;
160 $ws_name =~ s{(?:^\s+|\s+$)}{}g;
161 $ws_name =~ s{^(.{0,31}).*$}{$1};
162 my $ws = $wb->add_worksheet($ws_name) or
163 die "Unable to add worksheet to workbook";
167 my $r_mode = $options{r_mode} // 0;
168 # set to 1 if we have attempted to autodetect R mode
169 my $r_mode_autodetected = 0;
173 die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
174 my @row = $csv->fields();
175 if ($row==1 and not $r_mode_autodetected) {
180 if ($row==2 and not $r_mode_autodetected) {
181 $r_mode_autodetected = 1;
182 if (@row == (@header_row+1)) {
183 $r_mode = 1 unless exists $options{r_mode} and defined $options{r_mode};
186 # R doesn't output headers for rownames
187 unshift @header_row,'';
189 output_row(\@header_row,1,$ws,$wb,\%wb_formats,\%options);
191 if ($row > 65536) { # ok, we're going to overflow here
192 my $t_ws_name = $ws_name;
193 my $maxlen = 31-length('.'.$overflow);
194 $t_ws_name =~ s{^(.{0,$maxlen}).*$}{$1};
195 $ws = $wb->add_worksheet($t_ws_name.'.'.$overflow);
198 output_row(\@header_row,$row,$ws,$wb,\%wb_formats,\%options);
204 output_row(\@row,$row,$ws,$wb,\%wb_formats,\%options);
210 my ($data,$row,$ws,$wb,$formats,$options) = @_;
211 my @columns = ('A'..'Z','AA'..'ZZ');
212 for my $i (0..$#{$data}) {
214 if ($options->{auto_format}) {
215 if (looks_like_number($data->[$i])) {
217 # use scientific format?
218 if ($data->[$i] != 0 && (abs($data->[$i]) > 9999 ||
219 abs($data->[$i]) < 0.001)) {
220 $format_string = $options->{sci_format};
222 $format_string = '0';
223 my $digits = length(floor(abs($data->[$i])));
224 if ($options->{max_digits} - $digits > 0 and
225 abs($data->[$i]) != abs(floor($data->[$i]))) {
226 # if there are digits left over, use them for
227 # decimal places, but don't require them
228 $format_string = $format_string.'.'.('#'x($options->{max_digits} - $digits))
231 if (not exists $formats->{$format_string}) {
232 $formats->{$format_string} =
234 $formats->{$format_string}->
235 set_num_format($format_string);
237 $format = $formats->{$format_string};
240 $ws->write($columns[$i].$row,$data->[$i],defined $format?$format:())