X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fgrab;h=b034f2de42ca047a93550cecf9bcaab9f2d457c2;hb=48bea5c28b89dc5586d0bddb338ccd6ba23aa1f9;hp=fdf5bd287f78613d2aea83bb9b15f855cad57e77;hpb=e8e32d43d674573ff4a66fe77780cc18cb02d430;p=biopieces.git diff --git a/bp_bin/grab b/bp_bin/grab index fdf5bd2..b034f2d 100755 --- a/bp_bin/grab +++ b/bp_bin/grab @@ -1,6 +1,316 @@ #!/usr/bin/env perl +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Grab records in stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + use warnings; use strict; +use Data::Dumper; +use Maasha::Biopieces; +use Maasha::Common; +use Maasha::Patscan; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, $keys, $vals_only, $keys_only, $invert, + $patterns, $regex, %lookup_hash, $key, $op, $val, $found, $total, $grabbed ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'patterns', short => 'p', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'patterns_in', short => 'P', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'regex', short => 'r', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'eval', short => 'e', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'exact_in', short => 'E', type => 'file!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'invert', short => 'i', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'case_insensitive', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'keys_only', short => 'K', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'vals_only', short => 'V', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +$keys = $options->{ 'keys' }; +$vals_only = $options->{ 'vals_only' }; +$keys_only = $options->{ 'keys_only' }; +$invert = $options->{ 'invert' }; + +if ( $options->{ 'patterns' } ) +{ + $patterns = $options->{ 'patterns' }; +} +elsif ( defined $options->{ 'patterns_in' } and -f $options->{ 'patterns_in' } ) +{ + $patterns = Maasha::Patscan::read_patterns( $options->{ 'patterns_in' } ); +} +elsif ( $options->{ 'regex' } ) +{ + if ( $options->{ 'case_insensitive' } ) { + $regex = qr/$options->{ 'regex' }/i; + } else { + $regex = qr/$options->{ 'regex' }/; + } +} +elsif ( defined $options->{ 'exact_in' } and -f $options->{ 'exact_in' } ) +{ + $patterns = Maasha::Patscan::read_patterns( $options->{ 'exact_in' } ); + + map { $lookup_hash{ $_ } = 1 } @{ $patterns }; + + undef $patterns; +} +elsif ( $options->{ 'eval' } ) +{ + if ( $options->{ 'eval' } =~ /^([^><=! ]+)\s*(>=|<=|>|<|==|=|!=|eq|ne)\s*(.+)$/ ) + { + $key = $1; + $op = $2; + $val = $3; + } + else + { + Maasha::Common::error( qq(Bad eval string: $options->{ 'eval' }) ); + } +} + +$total = 0; +$grabbed = 0; + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + $found = 0; + + if ( %lookup_hash ) { + $found = grab_lookup( \%lookup_hash, $record, $keys, $vals_only, $keys_only ); + } elsif ( $patterns ) { + $found = grab_patterns( $patterns, $record, $keys, $vals_only, $keys_only ); + } elsif ( $regex ) { + $found = grab_regex( $regex, $record, $keys, $vals_only, $keys_only ); + } elsif ( $op ) { + $found = grab_eval( $key, $op, $val, $record ); + } + + if ( $found and not $invert ) + { + Maasha::Biopieces::put_record( $record, $out ); + $grabbed += 1; + } + elsif ( not $found and $invert ) + { + Maasha::Biopieces::put_record( $record, $out ); + $grabbed += 1; + } + + $total += 1; +} + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + +if ( $options->{ 'verbose' } ) +{ + print STDERR "Records grabbed: $grabbed\n"; + print STDERR "Records missed: " . ( $total - $grabbed ) . "\n"; + print STDERR "Patterns used: " . ( scalar @{$patterns} ) . "\n" if defined $patterns; + print STDERR "Patterns used: " . ( scalar keys %lookup_hash ) . "\n" if %lookup_hash; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub grab_lookup +{ + # Martin A. Hansen, November 2009. + + # Uses keys from a lookup hash to search records. Optionally, a list of + # keys can be given so the lookup is limited to these, also, flags + # can be given to limit lookup to keys or vals only. Returns 1 if lookup + # succeeded, else 0. + + my ( $lookup_hash, # hashref with patterns + $record, # hashref + $keys, # list of keys - OPTIONAL + $vals_only, # only vals flag - OPTIONAL + $keys_only, # only keys flag - OPTIONAL + ) = @_; + + # Returns boolean. + + if ( $keys ) + { + map { return 1 if defined $record->{ $_ } and exists $lookup_hash->{ $record->{ $_ } } } @{ $keys }; + } + else + { + if ( not $vals_only ) { + map { return 1 if exists $lookup_hash->{ $_ } } keys %{ $record }; + } + + if ( not $keys_only ) { + map { return 1 if defined $record->{ $_ } and exists $lookup_hash->{ $record->{ $_ } } } keys %{ $record }; + } + } + + return 0; +} + + +sub grab_patterns +{ + # Martin A. Hansen, November 2009. + + # Uses patterns to match records containing the pattern as a substring. + # Returns 1 if the record is matched, else 0. + + my ( $patterns, # list of patterns + $record, # hashref + $keys, # list of keys - OPTIONAL + $vals_only, # only vals flag - OPTIONAL + $keys_only, # only keys flag - OPTIONAL + ) = @_; + + # Returns boolean. + + my ( $pattern, $key ); + + foreach $pattern ( @{ $patterns } ) + { + if ( $keys ) + { + foreach $key ( @{ $keys } ) + { + return 0 if not exists $record->{ $key }; + return 1 if index( $record->{ $key }, $pattern ) >= 0; + } + } + else + { + if ( not $vals_only ) { + map { return 1 if index( $_, $pattern ) >= 0 } keys %{ $record }; + } + + if ( not $keys_only ) { + map { return 1 if index( $record->{ $_ }, $pattern ) >= 0 } keys %{ $record }; + } + } + } + + return 0; +} + + +sub grab_regex +{ + # Martin A. Hansen, November 2009. + + # Uses regex to match records. + # Returns 1 if the record is matched, else 0. + + my ( $regex, # regex to match + $record, # hashref + $keys, # list of keys - OPTIONAL + $vals_only, # only vals flag - OPTIONAL + $keys_only, # only keys flag - OPTIONAL + ) = @_; + + # Returns boolean. + + if ( $keys ) + { + map { return 1 if exists $record->{ $_ } and $record->{ $_ } =~ /$regex/ } @{ $keys }; + } + else + { + if ( not $vals_only ) { + map { return 1 if $_ =~ /$regex/ } keys %{ $record }; + } + + if ( not $keys_only ) { + map { return 1 if $record->{ $_ } =~ /$regex/ } keys %{ $record }; + } + } + + return 0; +} + + +sub grab_eval +{ + # Martin A. Hansen, November 2009. + + # Test if the value of a given record key evaluates according + # to a given operator. Returns 1 if eval is OK, else 0. + + my ( $key, # record key + $op, # operator + $val, # value + $record, # hashref + ) = @_; + + # Returns boolean. + + if ( defined $record->{ $key } ) + { + return 1 if ( $op eq "<" and $record->{ $key } < $val ); + return 1 if ( $op eq ">" and $record->{ $key } > $val ); + return 1 if ( $op eq ">=" and $record->{ $key } >= $val ); + return 1 if ( $op eq "<=" and $record->{ $key } <= $val ); + return 1 if ( $op eq "=" and $record->{ $key } == $val ); + return 1 if ( $op eq "==" and $record->{ $key } == $val ); + return 1 if ( $op eq "!=" and $record->{ $key } != $val ); + return 1 if ( $op eq "eq" and $record->{ $key } eq $val ); + return 1 if ( $op eq "ne" and $record->{ $key } ne $val ); + } + + return 0; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use Maasha::BioRun; +__END__