=cut
-
+use Scalar::Util qw(looks_like_number);
+use Text::CSV;
use vars qw($DEBUG);
my %options = (debug => 0,
- help => 0,
- man => 0,
- delimiter => "\t",
- field => [],
- regexp => [],
+ help => 0,
+ man => 0,
+ delimiter => "\t",
+ field => [],
+ regexp => [],
+ has_header => 0,
);
GetOptions(\%options,
- 'delimiter=s',
- 'field|f=s@',
- 'regexp|e=s@',
- 'debug|d+','help|h|?','man|m');
+ 'delimiter=s',
+ 'field|f=s@',
+ 'regexp|e=s@',
+ 'has_header|has-header!',
+ 'debug|d+','help|h|?','man|m');
pod2usage() if $options{help};
pod2usage({verbose=>2}) if $options{man};
$options{field} = [1];
}
-my @field_indexes = map { $_ > 0 ? $_ - 1 : $_;}
+my @header;
+my %header;
+if ($options{has_header}) {
+ # we allow for a header
+}
+
+my @field_indexes = map { looks_like_number($_) && $_ > 0 ? $_ - 1 : $_;}
map {split /,/} @{$options{field}};
my %field_indexes;
-@field_indexes{@field_indexes} = @field_indexes;
-@field_indexes = values %field_indexes;
+@field_indexes = map {++$field_indexes{$_} > 1 ? ():$_} @field_indexes;
-if (grep {not /-?\d+/} @field_indexes) {
+if (grep {not /-?\d+/} @field_indexes and not $options{has_header}) {
push @USAGE_ERRORS,"Invalid field index(es)";
}
push @{$options{regexp}}, shift @ARGV;
}
+if (not @ARGV) {
+ # we'll use this as a special indicator to read stdin
+ push @ARGV,undef;
+}
my %compiled_regexes;
-LINE: while (<>) {
- my $chomped = chomp;
- my $line = $_;
- my @fields = split /$options{delimiter}/;
- # skip lines which don't have enough fields
- next LINE if grep {not defined $_} @fields[@field_indexes];
- REGEX: for my $regex (@{$options{regexp}}) {
- FIELDS: for my $field (@fields[@field_indexes]) {
- if (length $regex) {
- if (not exists $compiled_regexes{$regex}) {
- $compiled_regexes{$regex} = qr/$regex/;
- }
- $field =~ $compiled_regexes{$regex} or next LINE;
- }
- }
+my $csv;
+if (length($options{delimiter}) > 1) { # delimiter is a regex, then
+ #csv will be undef
+} else {
+ $csv = Text::CSV->new({sep_char=>$options{delimiter}}) or
+ die "Unable to start Text::CSV";
+}
+FILE: for my $file (@ARGV) {
+ my $fh;
+ my $headers_updated = 0;
+ if (not defined $file) {
+ $fh = \*STDIN;
+ $file = "STDIN";
+ } else {
+ $fh = IO::File->new($file,'r') or
+ die "Unable to open $file for reading: $!";
+ }
+ LINE: while (<$fh>) {
+ my $chomped = chomp;
+ my $line = $_;
+ my @fields;
+ if (defined $csv) {
+ die "Unable to parse line $. of $file: ".$csv->error_diag() unless $csv->parse($_);
+ @fields = $csv->fields();
+ } else {
+ @fields = split /$options{delimiter}/o,$_;
+ }
+ # skip lines which don't have enough fields
+ if ($options{has_header} and not @header) {
+ @header = @fields;
+ @header{@header} = 0..$#fields;
+ print $line;
+ print $/ if $chomped;
+ next LINE;
+ }
+ if ($options{has_header} and not $headers_updated) {
+ $headers_updated = 1;
+ if (@header < @fields) {
+ @header{@header} = 1..@fields;
+ }
+ my @new_indexes;
+ for my $index (@field_indexes) {
+ push @new_indexes,$index and next if $index =~ /^-?\d+$/;
+ if (not exists $header{$index}) {
+ use Data::Dumper;
+ print STDERR Data::Dumper->Dump([\%header],[qw(*header)]);
+ print STDERR "Invalid header $index\n";
+ exit 1;
+ } else {
+ push @new_indexes,$header{$index};
+ }
+ }
+ @field_indexes = @new_indexes;
+ print STDERR Data::Dumper->Dump([\@field_indexes],[qw(*field_indexes)]) if $DEBUG;
+ }
+ next LINE if grep {not defined $_} @fields[@field_indexes];
+ my $i = -1;
+ REGEX: for my $regex (@{$options{regexp}}) {
+ $i++;
+ if (length $regex) {
+ my @fields_to_examine = map {$fields[$_]} @field_indexes;
+ if (@{$options{regexp}} > 1) {
+ @fields_to_examine = $fields_to_examine[$i];
+ }
+ FIELDS: for my $field (@fields_to_examine) {
+ if (not exists $compiled_regexes{$regex}) {
+ $compiled_regexes{$regex} = qr/$regex/;
+ }
+ print STDERR "regex: $regex field: $field\n" if $DEBUG;
+ $field =~ $compiled_regexes{$regex} and next REGEX;
+ }
+ next LINE;
+ }
+ }
+ print $line;
+ print $/ if $chomped;
}
- print $_;
- print $/ if $chomped;
}
-
__END__