#! /usr/bin/perl # transpose_table transposes tables, and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2011 by Don Armstrong . # $Id: perl_script 1825 2011-01-02 01:53:43Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME transpose_table - Transposes a table =head1 SYNOPSIS [options] Options: --tsv, -t tab separated value mode (Default) --ssv, -s space separated value mode --csv, -c comma separated value mode --bigfile,-b Big file; use disk --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief usage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use vars qw($DEBUG); use Fcntl qw(:seek); use File::Temp qw(tempdir); use MLDBM qw(DB_File Storable); use Devel::Peek; use List::Util qw(max); my %options = (debug => 0, help => 0, man => 0, bigfile => 0, ); GetOptions(\%options, 'tsv|t', 'ssv|s', 'csv|c', 'bigfile', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; if (0 == grep {exists $options{$_}} qw(tsv ssv csv)) { $options{tsv} = 1 } if (1 < grep {exists $options{$_}} qw(tsv ssv csv)) { push @USAGE_ERRORS,"You can only pass one of --tsv, --ssv, or --csv"; } if (not @ARGV) { # we'll use this as a special indicator to read stdin push @ARGV,undef; } if (@ARGV != 1) { push @USAGE_ERRORS,"Exactly one file must be specified"; } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my $sep_char = "\t"; if ($options{csv}) { $sep_char = ','; } elsif ($options{ssv}) { $sep_char = ' '; } my @row; for my $file (@ARGV) { my $in; if (not defined $file) { ...; # not supported yet; STDIN isn't seekable $in = \*STDIN; $file = "STDIN"; } else { $in = IO::File->new($file,'r') or die "Unable to open $file for reading: $!"; } my $out = \*STDOUT; my $first_time = 1; my $cur_row = 0; my $end; my $first_row_end; do { if (not defined $end) { $in->seek(0,SEEK_END); $end = $in->tell; $in->seek(0,SEEK_SET); } # from the current position, advance to complete the next field my $next_field = advance_to_field($in,$sep_char); # if we're at the end of the file, stop. if (not $first_time and $cur_row == 0) { print {$out} "\n"; } elsif ($cur_row != 0) { print {$out} $sep_char; } # write it to the output file print {$out} $next_field; if ($in->eof) { # avoid writing out a newline if the file was totally empty to start with print {$out} "\n" if not $first_time; last; } # if this is the first time through, store this position for # this row, then find the end of the row [field with a \n as a # terminator] and do the next loop; if we hit the end of the # file, we are no longer the first time through. $row[$cur_row] = $in->tell; if ($first_time) { print STDERR "\r$cur_row"; $cur_row++; advance_to_field($in,"\n"); $first_row_end = $in->tell if not defined $first_row_end; if ($in->eof()) { $first_time = 0; $in->seek($row[0],SEEK_SET); $cur_row = 0; } } else { # otherwise, advance to the next row's position $cur_row = ($cur_row + 1) % @row; $in->seek($row[$cur_row],SEEK_SET); } if ($cur_row == 0) { print STDERR "\r".$in->tell."/$first_row_end"; } } while (1); print STDERR "\n"; } sub advance_to_field { my ($fh,$sep) = @_; my $escaped = 0; my $char; my $return; do { $char = $fh->getc(); if ($char eq '"') { $escaped = $escaped ? 0 : 1; } if (not $escaped and ($char eq $sep or $char eq "\n")) { return $return; } $return .= $char; } while (1); } __END__