From 2c0c4213b2fb9961ac63139d22dcefe701d076d6 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 24 Jun 2013 20:16:03 -0700 Subject: [PATCH] allow delmiter to be a regex; use regex per column --- col_grep | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/col_grep b/col_grep index 22d6464..9f181cb 100755 --- a/col_grep +++ b/col_grep @@ -102,8 +102,7 @@ if ($options{has_header}) { my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;} map {split /,/} @{$options{field}}; my %field_indexes; -@field_indexes{@field_indexes} = @field_indexes; -@field_indexes = values %field_indexes; +@field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes; if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) { push @USAGE_ERRORS,"Invalid field index(es)"; @@ -120,7 +119,13 @@ if (not @ARGV) { push @ARGV,undef; } my %compiled_regexes; -my $csv = Text::CSV->new({sep_char=>$options{delimiter}}); +my $csv; +if (length($options{delimiter}) > 1) { # delimiter is a regex, then + #csv will be undef +} else { + $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or + die "Unable to start Text::CSV"; +} FILE: for my $file (@ARGV) { my $fh; my $headers_updated = 0; @@ -134,8 +139,13 @@ FILE: for my $file (@ARGV) { LINE: while (<$fh>) { my $chomped = chomp; my $line = $_; - die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_); - my @fields = $csv->fields(); + my @fields; + if (defined $csv) { + die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_); + @fields = $csv->fields(); + } else { + @fields = split /$options{delimiter}/o,$_; + } # skip lines which don't have enough fields if ($options{has_header} and not @header) { @header = @fields; @@ -145,7 +155,7 @@ FILE: for my $file (@ARGV) { next LINE; } if ($options{has_header} and not $headers_updated) { - $headers_updated = 0; + $headers_updated = 1; if (@header < @fields) { @header{@header} = 1..@fields; } @@ -154,7 +164,7 @@ FILE: for my $file (@ARGV) { push @new_indexes,$index and next if $index =~ /^-?\d+$/; if (not exists $header{$index}) { use Data::Dumper; - print STDERR Dumper(\%header); + print STDERR Data::Dumper->Dump([\%header],[qw(*header)]); print STDERR "Invalid header $index\n"; exit 1; } else { @@ -162,16 +172,25 @@ FILE: for my $file (@ARGV) { } } @field_indexes = @new_indexes; + print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG; } next LINE if grep {not defined $_} @fields[@field_indexes]; + my $i = -1; REGEX: for my $regex (@{$options{regexp}}) { - FIELDS: for my $field (@fields[@field_indexes]) { - if (length $regex) { + $i++; + if (length $regex) { + my @fields_to_examine = map {$fields[$_]} @field_indexes; + if (@{$options{regexp}} > 1) { + @fields_to_examine = $fields_to_examine[$i]; + } + FIELDS: for my $field (@fields_to_examine) { if (not exists $compiled_regexes{$regex}) { $compiled_regexes{$regex} = qr/$regex/; } - $field =~ $compiled_regexes{$regex} or next LINE; + print STDERR "regex: $regex field: $field\n" if $DEBUG; + $field =~ $compiled_regexes{$regex} and next REGEX; } + next LINE; } } print $line; -- 2.39.2