#!/usr/bin/env perl # Copyright (C) 2007-2009 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Grab records in stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< use warnings; use strict; use Data::Dumper; use Maasha::Biopieces; use Maasha::Common; use Maasha::Patscan; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< my ( $options, $in, $out, $record, $keys, $vals_only, $keys_only, $invert, $patterns, $regex, %lookup_hash, $key, $op, $val, $found, $total, $grabbed ); $options = Maasha::Biopieces::parse_options( [ { long => 'patterns', short => 'p', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'patterns_in', short => 'P', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'regex', short => 'r', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'eval', short => 'e', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'exact_in', short => 'E', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'invert', short => 'i', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'case_insensitive', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'keys_only', short => 'K', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, { long => 'vals_only', short => 'V', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); $keys = $options->{ 'keys' }; $vals_only = $options->{ 'vals_only' }; $keys_only = $options->{ 'keys_only' }; $invert = $options->{ 'invert' }; if ( $options->{ 'patterns' } ) { $patterns = $options->{ 'patterns' }; } elsif ( defined $options->{ 'patterns_in' } and -f $options->{ 'patterns_in' } ) { $patterns = Maasha::Patscan::read_patterns( $options->{ 'patterns_in' } ); } elsif ( $options->{ 'regex' } ) { if ( $options->{ 'case_insensitive' } ) { $regex = qr/$options->{ 'regex' }/i; } else { $regex = qr/$options->{ 'regex' }/; } } elsif ( defined $options->{ 'exact_in' } and -f $options->{ 'exact_in' } ) { $patterns = Maasha::Patscan::read_patterns( $options->{ 'exact_in' } ); map { $lookup_hash{ $_ } = 1 } @{ $patterns }; undef $patterns; } elsif ( $options->{ 'eval' } ) { if ( $options->{ 'eval' } =~ /^([^><=! ]+)\s*(>=|<=|>|<|==|=|!=|eq|ne)\s*(.+)$/ ) { $key = $1; $op = $2; $val = $3; } else { Maasha::Common::error( qq(Bad eval string: $options->{ 'eval' }) ); } } $total = 0; $grabbed = 0; while ( $record = Maasha::Biopieces::get_record( $in ) ) { $found = 0; if ( %lookup_hash ) { $found = grab_lookup( \%lookup_hash, $record, $keys, $vals_only, $keys_only ); } elsif ( $patterns ) { $found = grab_patterns( $patterns, $record, $keys, $vals_only, $keys_only ); } elsif ( $regex ) { $found = grab_regex( $regex, $record, $keys, $vals_only, $keys_only ); } elsif ( $op ) { $found = grab_eval( $key, $op, $val, $record ); } if ( $found and not $invert ) { Maasha::Biopieces::put_record( $record, $out ); $grabbed += 1; } elsif ( not $found and $invert ) { Maasha::Biopieces::put_record( $record, $out ); $grabbed += 1; } $total += 1; } Maasha::Biopieces::close_stream( $in ); Maasha::Biopieces::close_stream( $out ); if ( $options->{ 'verbose' } ) { print STDERR "Records grabbed: $grabbed\n"; print STDERR "Records missed: " . ( $total - $grabbed ) . "\n"; print STDERR "Patterns used: " . ( scalar @{$patterns} ) . "\n" if defined $patterns; print STDERR "Patterns used: " . ( scalar keys %lookup_hash ) . "\n" if %lookup_hash; } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< sub grab_lookup { # Martin A. Hansen, November 2009. # Uses keys from a lookup hash to search records. Optionally, a list of # keys can be given so the lookup is limited to these, also, flags # can be given to limit lookup to keys or vals only. Returns 1 if lookup # succeeded, else 0. my ( $lookup_hash, # hashref with patterns $record, # hashref $keys, # list of keys - OPTIONAL $vals_only, # only vals flag - OPTIONAL $keys_only, # only keys flag - OPTIONAL ) = @_; # Returns boolean. if ( $keys ) { map { return 1 if defined $record->{ $_ } and exists $lookup_hash->{ $record->{ $_ } } } @{ $keys }; } else { if ( not $vals_only ) { map { return 1 if exists $lookup_hash->{ $_ } } keys %{ $record }; } if ( not $keys_only ) { map { return 1 if defined $record->{ $_ } and exists $lookup_hash->{ $record->{ $_ } } } keys %{ $record }; } } return 0; } sub grab_patterns { # Martin A. Hansen, November 2009. # Uses patterns to match records containing the pattern as a substring. # Returns 1 if the record is matched, else 0. my ( $patterns, # list of patterns $record, # hashref $keys, # list of keys - OPTIONAL $vals_only, # only vals flag - OPTIONAL $keys_only, # only keys flag - OPTIONAL ) = @_; # Returns boolean. my ( $pattern, $key ); foreach $pattern ( @{ $patterns } ) { if ( $keys ) { foreach $key ( @{ $keys } ) { return 0 if not exists $record->{ $key }; return 1 if index( $record->{ $key }, $pattern ) >= 0; } } else { if ( not $vals_only ) { map { return 1 if index( $_, $pattern ) >= 0 } keys %{ $record }; } if ( not $keys_only ) { map { return 1 if index( $record->{ $_ }, $pattern ) >= 0 } keys %{ $record }; } } } return 0; } sub grab_regex { # Martin A. Hansen, November 2009. # Uses regex to match records. # Returns 1 if the record is matched, else 0. my ( $regex, # regex to match $record, # hashref $keys, # list of keys - OPTIONAL $vals_only, # only vals flag - OPTIONAL $keys_only, # only keys flag - OPTIONAL ) = @_; # Returns boolean. if ( $keys ) { map { return 1 if exists $record->{ $_ } and $record->{ $_ } =~ /$regex/ } @{ $keys }; } else { if ( not $vals_only ) { map { return 1 if $_ =~ /$regex/ } keys %{ $record }; } if ( not $keys_only ) { map { return 1 if $record->{ $_ } =~ /$regex/ } keys %{ $record }; } } return 0; } sub grab_eval { # Martin A. Hansen, November 2009. # Test if the value of a given record key evaluates according # to a given operator. Returns 1 if eval is OK, else 0. my ( $key, # record key $op, # operator $val, # value $record, # hashref ) = @_; # Returns boolean. if ( defined $record->{ $key } ) { return 1 if ( $op eq "<" and $record->{ $key } < $val ); return 1 if ( $op eq ">" and $record->{ $key } > $val ); return 1 if ( $op eq ">=" and $record->{ $key } >= $val ); return 1 if ( $op eq "<=" and $record->{ $key } <= $val ); return 1 if ( $op eq "=" and $record->{ $key } == $val ); return 1 if ( $op eq "==" and $record->{ $key } == $val ); return 1 if ( $op eq "!=" and $record->{ $key } != $val ); return 1 if ( $op eq "eq" and $record->{ $key } eq $val ); return 1 if ( $op eq "ne" and $record->{ $key } ne $val ); } return 0; } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< BEGIN { Maasha::Biopieces::status_set(); } END { Maasha::Biopieces::status_log(); } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< __END__