X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=col_grep;h=9f181cb16998cb04523ace37c2273f9a1c46be68;hb=1886c7247cb16ed68a78cd5550b152689a465999;hp=95ba3a7c6f09a5d92b20a2ece7e6ebbfd999ef9b;hpb=9f3e5f836f8f095eab173bdde451bb0f8eaa078b;p=bin.git diff --git a/col_grep b/col_grep index 95ba3a7..9f181cb 100755 --- a/col_grep +++ b/col_grep @@ -59,22 +59,25 @@ Display this manual. =cut - +use Scalar::Util qw(looks_like_number); +use Text::CSV; use vars qw($DEBUG); my %options = (debug => 0, - help => 0, - man => 0, - delimiter => "\t", - field => [], - regexp => [], + help => 0, + man => 0, + delimiter => "\t", + field => [], + regexp => [], + has_header => 0, ); GetOptions(\%options, - 'delimiter=s', - 'field|f=s@', - 'regexp|e=s@', - 'debug|d+','help|h|?','man|m'); + 'delimiter=s', + 'field|f=s@', + 'regexp|e=s@', + 'has_header|has-header!', + 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; @@ -90,13 +93,18 @@ if (not @{$options{field}}) { $options{field} = [1]; } -my @field_indexes = map { $_ > 0 ? $_ - 1 : $_;} +my @header; +my %header; +if ($options{has_header}) { + # we allow for a header +} + +my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;} map {split /,/} @{$options{field}}; my %field_indexes; -@field_indexes{@field_indexes} = @field_indexes; -@field_indexes = values %field_indexes; +@field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes; -if (grep {not /-?\d+/} @field_indexes) { +if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) { push @USAGE_ERRORS,"Invalid field index(es)"; } @@ -106,26 +114,88 @@ if (not @{$options{regexp}} and @ARGV) { push @{$options{regexp}}, shift @ARGV; } +if (not @ARGV) { + # we'll use this as a special indicator to read stdin + push @ARGV,undef; +} my %compiled_regexes; -LINE: while (<>) { - my $chomped = chomp; - my $line = $_; - my @fields = split /$options{delimiter}/; - # skip lines which don't have enough fields - next LINE if grep {not defined $_} @fields[@field_indexes]; - REGEX: for my $regex (@{$options{regexp}}) { - FIELDS: for my $field (@fields[@field_indexes]) { - if (length $regex) { - if (not exists $compiled_regexes{$regex}) { - $compiled_regexes{$regex} = qr/$regex/; - } - $field =~ $compiled_regexes{$regex} or next LINE; - } - } +my $csv; +if (length($options{delimiter}) > 1) { # delimiter is a regex, then + #csv will be undef +} else { + $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or + die "Unable to start Text::CSV"; +} +FILE: for my $file (@ARGV) { + my $fh; + my $headers_updated = 0; + if (not defined $file) { + $fh = \*STDIN; + $file = "STDIN"; + } else { + $fh = IO::File->new($file,'r') or + die "Unable to open $file for reading: $!"; + } + LINE: while (<$fh>) { + my $chomped = chomp; + my $line = $_; + my @fields; + if (defined $csv) { + die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_); + @fields = $csv->fields(); + } else { + @fields = split /$options{delimiter}/o,$_; + } + # skip lines which don't have enough fields + if ($options{has_header} and not @header) { + @header = @fields; + @header{@header} = 0..$#fields; + print $line; + print $/ if $chomped; + next LINE; + } + if ($options{has_header} and not $headers_updated) { + $headers_updated = 1; + if (@header < @fields) { + @header{@header} = 1..@fields; + } + my @new_indexes; + for my $index (@field_indexes) { + push @new_indexes,$index and next if $index =~ /^-?\d+$/; + if (not exists $header{$index}) { + use Data::Dumper; + print STDERR Data::Dumper->Dump([\%header],[qw(*header)]); + print STDERR "Invalid header $index\n"; + exit 1; + } else { + push @new_indexes,$header{$index}; + } + } + @field_indexes = @new_indexes; + print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG; + } + next LINE if grep {not defined $_} @fields[@field_indexes]; + my $i = -1; + REGEX: for my $regex (@{$options{regexp}}) { + $i++; + if (length $regex) { + my @fields_to_examine = map {$fields[$_]} @field_indexes; + if (@{$options{regexp}} > 1) { + @fields_to_examine = $fields_to_examine[$i]; + } + FIELDS: for my $field (@fields_to_examine) { + if (not exists $compiled_regexes{$regex}) { + $compiled_regexes{$regex} = qr/$regex/; + } + print STDERR "regex: $regex field: $field\n" if $DEBUG; + $field =~ $compiled_regexes{$regex} and next REGEX; + } + next LINE; + } + } + print $line; + print $/ if $chomped; } - print $_; - print $/ if $chomped; } - __END__