#! /usr/bin/perl # col_grep greps a column matching a pattern, and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2008 by Don Armstrong . # $Id: perl_script 1153 2008-04-08 00:04:20Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME col_grep - grep for a column matching a pattern =head1 SYNOPSIS col_grep [options] PATTERN [FILE...] col_grep [options] [-e PATTERN] [FILE...] Options: --delimiter, -d Delimiter to use (default tab) --regexp, -e pattern to match --fields, -f field number to match (default 1) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--delimiter,-d> Delimiter to use, defaults to tab =item B<--regexp, -e> Pattern to match =item B<--fields, -f> Field number to match; starts at 1 =item B<--help, -h> Display brief useage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use Scalar::Util qw(looks_like_number); use Text::CSV; use vars qw($DEBUG); my %options = (debug => 0, help => 0, man => 0, delimiter => "\t", field => [], regexp => [], has_header => 0, ); GetOptions(\%options, 'delimiter=s', 'field|f=s@', 'regexp|e=s@', 'has_header|has-header!', 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; # if (1) { # push @USAGE_ERRORS,"You must pass something"; # } if (not @{$options{field}}) { $options{field} = [1]; } my @header; my %header; if ($options{has_header}) { # we allow for a header } my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;} map {split /,/} @{$options{field}}; my %field_indexes; @field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes; if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) { push @USAGE_ERRORS,"Invalid field index(es)"; } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; if (not @{$options{regexp}} and @ARGV) { push @{$options{regexp}}, shift @ARGV; } if (not @ARGV) { # we'll use this as a special indicator to read stdin push @ARGV,undef; } my %compiled_regexes; my $csv; if (length($options{delimiter}) > 1) { # delimiter is a regex, then #csv will be undef } else { $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or die "Unable to start Text::CSV"; } FILE: for my $file (@ARGV) { my $fh; my $headers_updated = 0; if (not defined $file) { $fh = \*STDIN; $file = "STDIN"; } else { $fh = IO::File->new($file,'r') or die "Unable to open $file for reading: $!"; } LINE: while (<$fh>) { my $chomped = chomp; my $line = $_; my @fields; if (defined $csv) { die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_); @fields = $csv->fields(); } else { @fields = split /$options{delimiter}/o,$_; } # skip lines which don't have enough fields if ($options{has_header} and not @header) { @header = @fields; @header{@header} = 0..$#fields; print $line; print $/ if $chomped; next LINE; } if ($options{has_header} and not $headers_updated) { $headers_updated = 1; if (@header < @fields) { @header{@header} = 1..@fields; } my @new_indexes; for my $index (@field_indexes) { push @new_indexes,$index and next if $index =~ /^-?\d+$/; if (not exists $header{$index}) { use Data::Dumper; print STDERR Data::Dumper->Dump([\%header],[qw(*header)]); print STDERR "Invalid header $index\n"; exit 1; } else { push @new_indexes,$header{$index}; } } @field_indexes = @new_indexes; print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG; } next LINE if grep {not defined $_} @fields[@field_indexes]; my $i = -1; REGEX: for my $regex (@{$options{regexp}}) { $i++; if (length $regex) { my @fields_to_examine = map {$fields[$_]} @field_indexes; if (@{$options{regexp}} > 1) { @fields_to_examine = $fields_to_examine[$i]; } FIELDS: for my $field (@fields_to_examine) { if (not exists $compiled_regexes{$regex}) { $compiled_regexes{$regex} = qr/$regex/; } print STDERR "regex: $regex field: $field\n" if $DEBUG; $field =~ $compiled_regexes{$regex} and next REGEX; } next LINE; } } print $line; print $/ if $chomped; } } __END__