allow delmiter to be a regex; use regex per column

author Don Armstrong <don@donarmstrong.com>

Tue, 25 Jun 2013 03:16:03 +0000 (20:16 -0700)

committer Don Armstrong <don@donarmstrong.com>

Tue, 25 Jun 2013 03:16:03 +0000 (20:16 -0700)
author Don Armstrong <don@donarmstrong.com>
Tue, 25 Jun 2013 03:16:03 +0000 (20:16 -0700)
committer Don Armstrong <don@donarmstrong.com>
Tue, 25 Jun 2013 03:16:03 +0000 (20:16 -0700)
diff --git a/col_grep b/col_grep

index 22d6464808c94c437883ca34e5c96c8ec4f4960b..9f181cb16998cb04523ace37c2273f9a1c46be68 100755 (executable)
--- a/col_grep
+++ b/col_grep
@@ -102,8 +102,7 @@ if ($options{has_header}) {
  my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;}
      map {split /,/} @{$options{field}};
  my %field_indexes;
-@field_indexes{@field_indexes} = @field_indexes;
-@field_indexes = values %field_indexes;
+@field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes;
  
  if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) {
      push @USAGE_ERRORS,"Invalid field index(es)";
@@ -120,7 +119,13 @@ if (not @ARGV) {
      push @ARGV,undef;
  }
  my %compiled_regexes;
-my $csv = Text::CSV->new({sep_char=>$options{delimiter}});
+my $csv;
+if (length($options{delimiter}) > 1) { # delimiter is a regex, then
+    #csv will be undef
+} else {
+    $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or
+        die "Unable to start Text::CSV";
+} 
  FILE: for my $file (@ARGV) {
      my $fh;
      my $headers_updated = 0;
@@ -134,8 +139,13 @@ FILE: for my $file (@ARGV) {
   LINE: while (<$fh>) {
          my $chomped = chomp;
          my $line = $_;
-        die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
-        my @fields = $csv->fields();
+        my @fields;
+        if (defined $csv) {
+            die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
+            @fields = $csv->fields();
+        } else {
+            @fields = split /$options{delimiter}/o,$_;
+        }
          # skip lines which don't have enough fields
          if ($options{has_header} and not @header) {
              @header = @fields;
@@ -145,7 +155,7 @@ FILE: for my $file (@ARGV) {
              next LINE;
          }
          if ($options{has_header} and not $headers_updated) {
-            $headers_updated = 0;
+            $headers_updated = 1;
              if (@header < @fields) {
                  @header{@header} = 1..@fields;
              }
@@ -154,7 +164,7 @@ FILE: for my $file (@ARGV) {
                  push @new_indexes,$index and next if $index =~ /^-?\d+$/;
                  if (not exists $header{$index}) {
                      use Data::Dumper;
-                    print STDERR Dumper(\%header);
+                    print STDERR Data::Dumper->Dump([\%header],[qw(*header)]);
                      print STDERR "Invalid header $index\n";
                      exit 1;
                  } else {
@@ -162,16 +172,25 @@ FILE: for my $file (@ARGV) {
                  }
              }
              @field_indexes = @new_indexes;
+            print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG;
          }
          next LINE if grep {not defined $_} @fields[@field_indexes];
+        my $i = -1;
      REGEX: for my $regex (@{$options{regexp}}) {
-        FIELDS: for my $field (@fields[@field_indexes]) {
-                if (length $regex) {
+            $i++;
+            if (length $regex) {
+                my @fields_to_examine = map {$fields[$_]} @field_indexes;
+                if (@{$options{regexp}} > 1) {
+                    @fields_to_examine = $fields_to_examine[$i];
+                }
+            FIELDS: for my $field (@fields_to_examine) {
                      if (not exists $compiled_regexes{$regex}) {
                          $compiled_regexes{$regex} = qr/$regex/;
                      }
-                    $field =~ $compiled_regexes{$regex} or next LINE;
+                    print STDERR "regex: $regex field: $field\n" if $DEBUG;
+                    $field =~ $compiled_regexes{$regex} and next REGEX;
                  }
+                next LINE;
              }
          }
          print $line;
author	Don Armstrong <don@donarmstrong.com>
	Tue, 25 Jun 2013 03:16:03 +0000 (20:16 -0700)
committer	Don Armstrong <don@donarmstrong.com>
	Tue, 25 Jun 2013 03:16:03 +0000 (20:16 -0700)