2 # txt2xls turns text files into excel workbooks, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2008 by Don Armstrong <don@donarmstrong.com>.
6 # $Id: perl_script 1153 2008-04-08 00:04:20Z don $
17 txt2xls - Turns a (set of) text file(s) into an excel workbook
24 --tsv, -t tab separated value mode (Default)
25 --ssv, -s space separated value mode
26 --csv, -c comma separated value mode
27 --r-mode, -r R mode (Default)
28 --debug, -d debugging level (Default 0)
29 --help, -h display this help
30 --man, -m display manual
38 Debug verbosity. (Default 0)
42 Display brief useage information.
59 use Spreadsheet::WriteExcel;
61 my %options = (debug => 0,
72 'remove_name|remove-name=s@',
73 'debug|d+','help|h|?','man|m');
75 pod2usage() if $options{help};
76 pod2usage({verbose=>2}) if $options{man};
78 $DEBUG = $options{debug};
81 if (0 == grep {exists $options{$_}} qw(tsv ssv csv)) {
84 if (1 < grep {exists $options{$_}} qw(tsv ssv csv)) {
85 push @USAGE_ERRORS,"You can only pass one of --tsv, --ssv, or --csv";
88 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
91 # we'll use this as a special indicator to read stdin
99 elsif ($options{ssv}) {
103 my $csv = Text::CSV->new({sep_char=>$sep_char});
104 my $wb = Spreadsheet::WriteExcel->new(\*STDOUT);
105 for my $file (@ARGV) {
107 if (not defined $file) {
112 $fh = IO::File->new($file,'r') or
113 die "Unable to open $file for reading: $!";
116 foreach my $remove (@{$options{remove_name}}) {
117 $ws_name =~ s{\Q$remove\E}{}g;
119 $ws_name =~ s{\.[^\.]+$}{}g;
120 $ws_name =~ s/_+/ /g;
121 $ws_name =~ s{[\]:*?\/\] ]+}{ }g;
122 $ws_name =~ s{(?:^\s+|\s+$)}{}g;
123 $ws_name =~ s{^(.{0,31}).*$}{$1};
124 my $ws = $wb->add_worksheet($ws_name) or
125 die "Unable to add worksheet to workbook";
129 my $r_mode = $options{r_mode} // 0;
130 # set to 1 if we have attempted to autodetect R mode
131 my $r_mode_autodetected = 0;
135 die "Unable to parse line $. of $file" unless $csv->parse($_);
136 my @row = $csv->fields();
137 if ($row==1 and not $r_mode_autodetected) {
142 if ($row==2 and not $r_mode_autodetected) {
143 $r_mode_autodetected = 1;
144 if (@row == (@header_row+1)) {
145 $r_mode = 1 unless exists $options{r_mode} and defined $options{r_mode};
148 # R doesn't output headers for rownames
149 unshift @header_row,'';
151 output_row(\@header_row,1,$ws);
153 if ($row > 65536) { # ok, we're going to overflow here
154 my $t_ws_name = $ws_name;
155 my $maxlen = 31-length('.'.$overflow);
156 $t_ws_name =~ s{^(.{0,$maxlen}).*$}{$1};
157 $ws = $wb->add_worksheet($ws_name.'.'.$overflow);
159 output_row(\@header_row,$row,$ws);
165 output_row(\@row,$row,$ws);
171 my ($data,$row,$ws) = @_;
172 my @columns = ('A'..'Z','AA'..'ZZ');
173 for my $i (0..$#{$data}) {
174 $ws->write($columns[$i].$row,$data->[$i]);