From: Don Armstrong Date: Fri, 27 Jun 2008 03:37:58 +0000 (+0000) Subject: add col_grep function X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=9f3e5f836f8f095eab173bdde451bb0f8eaa078b;p=bin.git add col_grep function --- diff --git a/col_grep b/col_grep new file mode 100755 index 0000000..95ba3a7 --- /dev/null +++ b/col_grep @@ -0,0 +1,131 @@ +#! /usr/bin/perl +# col_grep greps a column matching a pattern, and is released +# under the terms of the GPL version 2, or any later version, at your +# option. See the file README and COPYING for more information. +# Copyright 2008 by Don Armstrong . +# $Id: perl_script 1153 2008-04-08 00:04:20Z don $ + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +col_grep - grep for a column matching a pattern + +=head1 SYNOPSIS + +col_grep [options] PATTERN [FILE...] +col_grep [options] [-e PATTERN] [FILE...] + + Options: + --delimiter, -d Delimiter to use (default tab) + --regexp, -e pattern to match + --fields, -f field number to match (default 1) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--delimiter,-d> + +Delimiter to use, defaults to tab + +=item B<--regexp, -e> + +Pattern to match + +=item B<--fields, -f> + +Field number to match; starts at 1 + +=item B<--help, -h> + +Display brief useage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + + +=cut + + +use vars qw($DEBUG); + +my %options = (debug => 0, + help => 0, + man => 0, + delimiter => "\t", + field => [], + regexp => [], + ); + +GetOptions(\%options, + 'delimiter=s', + 'field|f=s@', + 'regexp|e=s@', + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +# if (1) { +# push @USAGE_ERRORS,"You must pass something"; +# } + +if (not @{$options{field}}) { + $options{field} = [1]; +} + +my @field_indexes = map { $_ > 0 ? $_ - 1 : $_;} + map {split /,/} @{$options{field}}; +my %field_indexes; +@field_indexes{@field_indexes} = @field_indexes; +@field_indexes = values %field_indexes; + +if (grep {not /-?\d+/} @field_indexes) { + push @USAGE_ERRORS,"Invalid field index(es)"; +} + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + +if (not @{$options{regexp}} and @ARGV) { + push @{$options{regexp}}, shift @ARGV; +} + +my %compiled_regexes; +LINE: while (<>) { + my $chomped = chomp; + my $line = $_; + my @fields = split /$options{delimiter}/; + # skip lines which don't have enough fields + next LINE if grep {not defined $_} @fields[@field_indexes]; + REGEX: for my $regex (@{$options{regexp}}) { + FIELDS: for my $field (@fields[@field_indexes]) { + if (length $regex) { + if (not exists $compiled_regexes{$regex}) { + $compiled_regexes{$regex} = qr/$regex/; + } + $field =~ $compiled_regexes{$regex} or next LINE; + } + } + } + print $_; + print $/ if $chomped; +} + + +__END__