From 0606f5ba62903c0e3042602a9243822e61fe289f Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Wed, 10 Apr 2013 09:19:53 -0700 Subject: [PATCH] allow selecting by header name if --has-header is given --- col_grep | 111 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 30 deletions(-) diff --git a/col_grep b/col_grep index 95ba3a7..22d6464 100755 --- a/col_grep +++ b/col_grep @@ -59,22 +59,25 @@ Display this manual. =cut - +use Scalar::Util qw(looks_like_number); +use Text::CSV; use vars qw($DEBUG); my %options = (debug => 0, - help => 0, - man => 0, - delimiter => "\t", - field => [], - regexp => [], + help => 0, + man => 0, + delimiter => "\t", + field => [], + regexp => [], + has_header => 0, ); GetOptions(\%options, - 'delimiter=s', - 'field|f=s@', - 'regexp|e=s@', - 'debug|d+','help|h|?','man|m'); + 'delimiter=s', + 'field|f=s@', + 'regexp|e=s@', + 'has_header|has-header!', + 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; @@ -90,13 +93,19 @@ if (not @{$options{field}}) { $options{field} = [1]; } -my @field_indexes = map { $_ > 0 ? $_ - 1 : $_;} +my @header; +my %header; +if ($options{has_header}) { + # we allow for a header +} + +my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;} map {split /,/} @{$options{field}}; my %field_indexes; @field_indexes{@field_indexes} = @field_indexes; @field_indexes = values %field_indexes; -if (grep {not /-?\d+/} @field_indexes) { +if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) { push @USAGE_ERRORS,"Invalid field index(es)"; } @@ -106,26 +115,68 @@ if (not @{$options{regexp}} and @ARGV) { push @{$options{regexp}}, shift @ARGV; } +if (not @ARGV) { + # we'll use this as a special indicator to read stdin + push @ARGV,undef; +} my %compiled_regexes; -LINE: while (<>) { - my $chomped = chomp; - my $line = $_; - my @fields = split /$options{delimiter}/; - # skip lines which don't have enough fields - next LINE if grep {not defined $_} @fields[@field_indexes]; - REGEX: for my $regex (@{$options{regexp}}) { - FIELDS: for my $field (@fields[@field_indexes]) { - if (length $regex) { - if (not exists $compiled_regexes{$regex}) { - $compiled_regexes{$regex} = qr/$regex/; - } - $field =~ $compiled_regexes{$regex} or next LINE; - } - } +my $csv = Text::CSV->new({sep_char=>$options{delimiter}}); +FILE: for my $file (@ARGV) { + my $fh; + my $headers_updated = 0; + if (not defined $file) { + $fh = \*STDIN; + $file = "STDIN"; + } else { + $fh = IO::File->new($file,'r') or + die "Unable to open $file for reading: $!"; + } + LINE: while (<$fh>) { + my $chomped = chomp; + my $line = $_; + die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_); + my @fields = $csv->fields(); + # skip lines which don't have enough fields + if ($options{has_header} and not @header) { + @header = @fields; + @header{@header} = 0..$#fields; + print $line; + print $/ if $chomped; + next LINE; + } + if ($options{has_header} and not $headers_updated) { + $headers_updated = 0; + if (@header < @fields) { + @header{@header} = 1..@fields; + } + my @new_indexes; + for my $index (@field_indexes) { + push @new_indexes,$index and next if $index =~ /^-?\d+$/; + if (not exists $header{$index}) { + use Data::Dumper; + print STDERR Dumper(\%header); + print STDERR "Invalid header $index\n"; + exit 1; + } else { + push @new_indexes,$header{$index}; + } + } + @field_indexes = @new_indexes; + } + next LINE if grep {not defined $_} @fields[@field_indexes]; + REGEX: for my $regex (@{$options{regexp}}) { + FIELDS: for my $field (@fields[@field_indexes]) { + if (length $regex) { + if (not exists $compiled_regexes{$regex}) { + $compiled_regexes{$regex} = qr/$regex/; + } + $field =~ $compiled_regexes{$regex} or next LINE; + } + } + } + print $line; + print $/ if $chomped; } - print $_; - print $/ if $chomped; } - __END__ -- 2.39.2