add transpose table

[bin.git] / transpose_table
diff --git a/transpose_table b/transpose_table

new file mode 100755 (executable)

index 0000000..1232fe2
--- /dev/null
+++ b/transpose_table
@@ -0,0 +1,159 @@
+#! /usr/bin/perl
+# transpose_table transposes tables, and is released
+# under the terms of the GPL version 2, or any later version, at your
+# option. See the file README and COPYING for more information.
+# Copyright 2011 by Don Armstrong <don@donarmstrong.com>.
+# $Id: perl_script 1825 2011-01-02 01:53:43Z don $
+
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+
+=head1 NAME
+
+transpose_table - Transposes a table
+
+=head1 SYNOPSIS
+
+ [options]
+
+ Options:
+  --tsv, -t tab separated value mode (Default)
+  --ssv, -s space separated value mode
+  --csv, -c comma separated value mode
+  --bigfile,-b Big file; use disk
+  --debug, -d debugging level (Default 0)
+  --help, -h display this help
+  --man, -m display manual
+
+=head1 OPTIONS
+
+=over
+
+=item B<--debug, -d>
+
+Debug verbosity. (Default 0)
+
+=item B<--help, -h>
+
+Display brief usage information.
+
+=item B<--man, -m>
+
+Display this manual.
+
+=back
+
+=head1 EXAMPLES
+
+
+=cut
+
+
+use vars qw($DEBUG);
+
+use Text::CSV;
+
+use File::Temp qw(tempdir);
+
+use MLDBM qw(DB_File Storable);
+use Fcntl;
+
+use List::Util qw(max);
+
+my %options = (debug           => 0,
+              help            => 0,
+              man             => 0,
+              bigfile         => 0,
+              );
+
+GetOptions(\%options,
+          'tsv|t',
+          'ssv|s',
+          'csv|c',
+          'bigfile',
+          'debug|d+','help|h|?','man|m');
+
+pod2usage() if $options{help};
+pod2usage({verbose=>2}) if $options{man};
+
+$DEBUG = $options{debug};
+
+my @USAGE_ERRORS;
+if (0 == grep {exists $options{$_}} qw(tsv ssv csv)) {
+    $options{tsv} = 1
+}
+if (1 < grep {exists $options{$_}} qw(tsv ssv csv)) {
+     push @USAGE_ERRORS,"You can only pass one of --tsv, --ssv, or --csv";
+}
+
+if (not @ARGV) {
+    # we'll use this as a special indicator to read stdin
+    push @ARGV,undef;
+}
+if (@ARGV != 1) {
+    push @USAGE_ERRORS,"Exactly one file must be specified";
+}
+
+pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
+
+my $sep_char = "\t";
+if ($options{csv}) {
+    $sep_char = ',';
+}
+elsif ($options{ssv}) {
+    $sep_char = ' ';
+}
+
+my $csv = Text::CSV->new({sep_char=>$sep_char});
+
+my %rows;
+my $n_rows = 0;
+
+if ($options{bigfile}) {
+    my $temp_dir = tempdir(CLEANUP=>1);
+    tie %rows,"MLDBM","$temp_dir/bigfile", O_CREAT|O_RDWR, 0600 or
+       die "Unable to tie '$temp_dir/bigfile': $!";
+}
+my $cols = 0;
+my $rows = 0;
+for my $file (@ARGV) {
+    my $fh;
+    if (not defined $file) {
+       $fh = \*STDIN;
+       $file = "STDIN";
+    }
+    else {
+       $fh = IO::File->new($file,'r') or
+           die "Unable to open $file for reading: $!";
+    }
+    while (<$fh>) {
+       chomp;
+       # parse the line
+       die "Unable to parse line $. of $file" unless $csv->parse($_);
+       my @row = $csv->fields();
+       $cols = max(scalar @row,$cols);
+       for my $i (0..$#row) {
+           $rows{$rows.'-'.$i} = $row[$i];
+       }
+       $rows++;
+       print STDERR "\rInput $rows rows";# if not ($rows-1) % 50;
+    }
+    print STDERR "\n";
+    $fh = \*STDOUT;
+    for my $i (0..($cols-1)) {
+       my @row;
+       for my $j (0..($rows-1)) {
+           push @row,$rows{$j.'-'.$i};
+       }
+       $csv->print($fh,\@row);
+       print "\n";
+       print STDERR "\rOutput ".($i+1)."/$cols rows";
+    }
+    print STDERR "\n";
+}
+
+__END__