have update_org_files also pull

[bin.git] / col_grep
diff --git a/col_grep b/col_grep

index 95ba3a7c6f09a5d92b20a2ece7e6ebbfd999ef9b..9f181cb16998cb04523ace37c2273f9a1c46be68 100755 (executable)
--- a/col_grep
+++ b/col_grep
@@ -59,22 +59,25 @@ Display this manual.
  
  =cut
  
-
+use Scalar::Util qw(looks_like_number);
+use Text::CSV;
  use vars qw($DEBUG);
  
  my %options = (debug            => 0,
-              help             => 0,
-              man              => 0,
-              delimiter        => "\t",
-              field            => [],
-              regexp           => [],
+               help             => 0,
+               man              => 0,
+               delimiter        => "\t",
+               field            => [],
+               regexp           => [],
+               has_header       => 0,
               );
  
  GetOptions(\%options,
-          'delimiter=s',
-          'field|f=s@',
-          'regexp|e=s@',
-          'debug|d+','help|h|?','man|m');
+           'delimiter=s',
+           'field|f=s@',
+           'regexp|e=s@',
+           'has_header|has-header!',
+           'debug|d+','help|h|?','man|m');
  
  pod2usage() if $options{help};
  pod2usage({verbose=>2}) if $options{man};
@@ -90,13 +93,18 @@ if (not @{$options{field}}) {
      $options{field} = [1];
  }
  
-my @field_indexes = map { $_ > 0 ? $_ - 1 : $_;}
+my @header;
+my %header;
+if ($options{has_header}) {
+    # we allow for a header
+}
+
+my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;}
      map {split /,/} @{$options{field}};
  my %field_indexes;
-@field_indexes{@field_indexes} = @field_indexes;
-@field_indexes = values %field_indexes;
+@field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes;
  
-if (grep {not /-?\d+/} @field_indexes) {
+if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) {
      push @USAGE_ERRORS,"Invalid field index(es)";
  }
  
@@ -106,26 +114,88 @@ if (not @{$options{regexp}} and @ARGV) {
      push @{$options{regexp}}, shift @ARGV;
  }
  
+if (not @ARGV) {
+    # we'll use this as a special indicator to read stdin
+    push @ARGV,undef;
+}
  my %compiled_regexes;
-LINE: while (<>) {
-    my $chomped = chomp;
-    my $line = $_;
-    my @fields = split /$options{delimiter}/;
-    # skip lines which don't have enough fields
-    next LINE if grep {not defined $_} @fields[@field_indexes];
- REGEX: for my $regex (@{$options{regexp}}) {
-    FIELDS: for my $field (@fields[@field_indexes]) {
-           if (length $regex) {
-               if (not exists $compiled_regexes{$regex}) {
-                   $compiled_regexes{$regex} = qr/$regex/;
-               }
-               $field =~ $compiled_regexes{$regex} or next LINE;
-           }
-       }
+my $csv;
+if (length($options{delimiter}) > 1) { # delimiter is a regex, then
+    #csv will be undef
+} else {
+    $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or
+        die "Unable to start Text::CSV";
+} 
+FILE: for my $file (@ARGV) {
+    my $fh;
+    my $headers_updated = 0;
+    if (not defined $file) {
+        $fh = \*STDIN;
+        $file = "STDIN";
+    } else {
+        $fh = IO::File->new($file,'r') or
+            die "Unable to open $file for reading: $!";
+    }
+ LINE: while (<$fh>) {
+        my $chomped = chomp;
+        my $line = $_;
+        my @fields;
+        if (defined $csv) {
+            die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
+            @fields = $csv->fields();
+        } else {
+            @fields = split /$options{delimiter}/o,$_;
+        }
+        # skip lines which don't have enough fields
+        if ($options{has_header} and not @header) {
+            @header = @fields;
+            @header{@header} = 0..$#fields;
+            print $line;
+            print $/ if $chomped;
+            next LINE;
+        }
+        if ($options{has_header} and not $headers_updated) {
+            $headers_updated = 1;
+            if (@header < @fields) {
+                @header{@header} = 1..@fields;
+            }
+            my @new_indexes;
+            for my $index (@field_indexes) {
+                push @new_indexes,$index and next if $index =~ /^-?\d+$/;
+                if (not exists $header{$index}) {
+                    use Data::Dumper;
+                    print STDERR Data::Dumper->Dump([\%header],[qw(*header)]);
+                    print STDERR "Invalid header $index\n";
+                    exit 1;
+                } else {
+                    push @new_indexes,$header{$index};
+                }
+            }
+            @field_indexes = @new_indexes;
+            print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG;
+        }
+        next LINE if grep {not defined $_} @fields[@field_indexes];
+        my $i = -1;
+    REGEX: for my $regex (@{$options{regexp}}) {
+            $i++;
+            if (length $regex) {
+                my @fields_to_examine = map {$fields[$_]} @field_indexes;
+                if (@{$options{regexp}} > 1) {
+                    @fields_to_examine = $fields_to_examine[$i];
+                }
+            FIELDS: for my $field (@fields_to_examine) {
+                    if (not exists $compiled_regexes{$regex}) {
+                        $compiled_regexes{$regex} = qr/$regex/;
+                    }
+                    print STDERR "regex: $regex field: $field\n" if $DEBUG;
+                    $field =~ $compiled_regexes{$regex} and next REGEX;
+                }
+                next LINE;
+            }
+        }
+        print $line;
+        print $/ if $chomped;
      }
-    print $_;
-    print $/ if $chomped;
  }
  
-
  __END__