2 # col_grep greps a column matching a pattern, and is released
3 # under the terms of the GPL version 2, or any later version, at your
4 # option. See the file README and COPYING for more information.
5 # Copyright 2008 by Don Armstrong <don@donarmstrong.com>.
6 # $Id: perl_script 1153 2008-04-08 00:04:20Z don $
17 col_grep - grep for a column matching a pattern
21 col_grep [options] PATTERN [FILE...]
22 col_grep [options] [-e PATTERN] [FILE...]
25 --delimiter, -d Delimiter to use (default tab)
26 --regexp, -e pattern to match
27 --fields, -f field number to match (default 1)
28 --help, -h display this help
29 --man, -m display manual
35 =item B<--delimiter,-d>
37 Delimiter to use, defaults to tab
45 Field number to match; starts at 1
49 Display brief useage information.
62 use Scalar::Util qw(looks_like_number);
66 my %options = (debug => 0,
79 'has_header|has-header!',
80 'debug|d+','help|h|?','man|m');
82 pod2usage() if $options{help};
83 pod2usage({verbose=>2}) if $options{man};
85 $DEBUG = $options{debug};
89 # push @USAGE_ERRORS,"You must pass something";
92 if (not @{$options{field}}) {
93 $options{field} = [1];
98 if ($options{has_header}) {
99 # we allow for a header
102 my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;}
103 map {split /,/} @{$options{field}};
105 @field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes;
107 if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) {
108 push @USAGE_ERRORS,"Invalid field index(es)";
111 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
113 if (not @{$options{regexp}} and @ARGV) {
114 push @{$options{regexp}}, shift @ARGV;
118 # we'll use this as a special indicator to read stdin
121 my %compiled_regexes;
123 if (length($options{delimiter}) > 1) { # delimiter is a regex, then
126 $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or
127 die "Unable to start Text::CSV";
129 FILE: for my $file (@ARGV) {
131 my $headers_updated = 0;
132 if (not defined $file) {
136 $fh = IO::File->new($file,'r') or
137 die "Unable to open $file for reading: $!";
139 LINE: while (<$fh>) {
144 die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
145 @fields = $csv->fields();
147 @fields = split /$options{delimiter}/o,$_;
149 # skip lines which don't have enough fields
150 if ($options{has_header} and not @header) {
152 @header{@header} = 0..$#fields;
154 print $/ if $chomped;
157 if ($options{has_header} and not $headers_updated) {
158 $headers_updated = 1;
159 if (@header < @fields) {
160 @header{@header} = 1..@fields;
163 for my $index (@field_indexes) {
164 push @new_indexes,$index and next if $index =~ /^-?\d+$/;
165 if (not exists $header{$index}) {
167 print STDERR Data::Dumper->Dump([\%header],[qw(*header)]);
168 print STDERR "Invalid header $index\n";
171 push @new_indexes,$header{$index};
174 @field_indexes = @new_indexes;
175 print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG;
177 next LINE if grep {not defined $_} @fields[@field_indexes];
179 REGEX: for my $regex (@{$options{regexp}}) {
182 my @fields_to_examine = map {$fields[$_]} @field_indexes;
183 if (@{$options{regexp}} > 1) {
184 @fields_to_examine = $fields_to_examine[$i];
186 FIELDS: for my $field (@fields_to_examine) {
187 if (not exists $compiled_regexes{$regex}) {
188 $compiled_regexes{$regex} = qr/$regex/;
190 print STDERR "regex: $regex field: $field\n" if $DEBUG;
191 $field =~ $compiled_regexes{$regex} and next REGEX;
197 print $/ if $chomped;