#! /usr/bin/perl # transpose_table transposes tables, and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2011 by Don Armstrong . # $Id: perl_script 1825 2011-01-02 01:53:43Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME transpose_table - Transposes a table =head1 SYNOPSIS [options] Options: --tsv, -t tab separated value mode (Default) --ssv, -s space separated value mode --csv, -c comma separated value mode --bigfile,-b Big file; use disk --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use vars qw($DEBUG); use Text::CSV; use File::Temp qw(tempdir); use MLDBM qw(DB_File Storable); use Fcntl; use List::Util qw(max); my %options = (debug => 0, help => 0, man => 0, bigfile => 0, ); GetOptions(\%options, 'tsv|t', 'ssv|s', 'csv|c', 'bigfile', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; if (0 == grep {exists $options{$_}} qw(tsv ssv csv)) { $options{tsv} = 1 } if (1 < grep {exists $options{$_}} qw(tsv ssv csv)) { push @USAGE_ERRORS,"You can only pass one of --tsv, --ssv, or --csv"; } if (not @ARGV) { # we'll use this as a special indicator to read stdin push @ARGV,undef; } if (@ARGV != 1) { push @USAGE_ERRORS,"Exactly one file must be specified"; } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my $sep_char = "\t"; if ($options{csv}) { $sep_char = ','; } elsif ($options{ssv}) { $sep_char = ' '; } my $csv = Text::CSV->new({sep_char=>$sep_char}); my %rows; my $n_rows = 0; if ($options{bigfile}) { my $temp_dir = tempdir(CLEANUP=>1); tie %rows,"MLDBM","$temp_dir/bigfile", O_CREAT|O_RDWR, 0600 or die "Unable to tie '$temp_dir/bigfile': $!"; } my $cols = 0; my $rows = 0; for my $file (@ARGV) { my $fh; if (not defined $file) { $fh = \*STDIN; $file = "STDIN"; } else { $fh = IO::File->new($file,'r') or die "Unable to open $file for reading: $!"; } while (<$fh>) { chomp; # parse the line die "Unable to parse line $. of $file" unless $csv->parse($_); my @row = $csv->fields(); $cols = max(scalar @row,$cols); for my $i (0..$#row) { $rows{$rows.'-'.$i} = $row[$i]; } $rows++; print STDERR "\rInput $rows rows";# if not ($rows-1) % 50; } print STDERR "\n"; $fh = \*STDOUT; for my $i (0..($cols-1)) { my @row; for my $j (0..($rows-1)) { push @row,$rows{$j.'-'.$i}; } $csv->print($fh,\@row); print "\n"; print STDERR "\rOutput ".($i+1)."/$cols rows"; } print STDERR "\n"; } __END__