From 2146fcf5353d75ed4c3a7da0a6c8fdee46ac91da Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 3 Nov 2008 08:07:35 +0000 Subject: [PATCH] rewrote grab to get speed git-svn-id: http://biopieces.googlecode.com/svn/trunk@292 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/Biopieces.pm | 327 ++++++++++++++++++---------------- code_perl/Maasha/Seq.pm | 2 +- 2 files changed, 177 insertions(+), 152 deletions(-) diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index a284734..51821ac 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -5043,28 +5043,40 @@ sub script_grab # Returns nothing. - my ( $patterns, $pattern, $record, $key, $pos, $op, $val, %lookup_hash ); + my ( $keys, $vals_only, $keys_only, $invert, $patterns, $pattern, $regex, $record, $key, $op, $val, %lookup_hash, $found ); - if ( $options->{ "patterns" } ) + $keys = $options->{ 'keys' }; + $vals_only = $options->{ 'vals_only' }; + $keys_only = $options->{ 'keys_only' }; + $invert = $options->{ 'invert' }; + + if ( $options->{ 'patterns' } ) { - $patterns = [ split ",", $options->{ "patterns" } ]; + $patterns = [ split ",", $options->{ 'patterns' } ]; } - elsif ( -f $options->{ "patterns_in" } ) + elsif ( -f $options->{ 'patterns_in' } ) { - $patterns = Maasha::Patscan::read_patterns( $options->{ "patterns_in" } ); + $patterns = Maasha::Patscan::read_patterns( $options->{ 'patterns_in' } ); } - elsif ( -f $options->{ "exact_in" } ) + elsif ( $options->{ 'regex' } ) { - $patterns = Maasha::Patscan::read_patterns( $options->{ "exact_in" } ); + if ( $options->{ 'case_insensitive' } ) { + $regex = qr/$options->{ 'regex' }/i; + } else { + $regex = qr/$options->{ 'regex' }/; + } + } + elsif ( -f $options->{ 'exact_in' } ) + { + $patterns = Maasha::Patscan::read_patterns( $options->{ 'exact_in' } ); map { $lookup_hash{ $_ } = 1 } @{ $patterns }; undef $patterns; } - - if ( $options->{ "eval" } ) + elsif ( $options->{ 'eval' } ) { - if ( $options->{ "eval" } =~ /^([^><=! ]+)\s*(>=|<=|>|<|=|!=|eq|ne)\s*(.+)$/ ) + if ( $options->{ 'eval' } =~ /^([^><=! ]+)\s*(>=|<=|>|<|=|!=|eq|ne)\s*(.+)$/ ) { $key = $1; $op = $2; @@ -5074,151 +5086,21 @@ sub script_grab while ( $record = get_record( $in ) ) { - $pos = -1; - - if ( %lookup_hash ) - { - if ( $options->{ "keys" } ) - { - foreach $key ( @{ $options->{ "keys" } } ) - { - if ( exists $lookup_hash{ $record->{ $key } } ) - { - $pos = 1; - goto FOUND; - } - } - } - else - { - foreach $key ( keys %{ $record } ) - { - if ( not $options->{ "vals_only" } ) - { - if ( exists $lookup_hash{ $key } ) - { - $pos = 1; - goto FOUND; - } - } - - if ( not $options->{ "keys_only" } ) - { - if ( exists $lookup_hash{ $record->{ $key } } ) - { - $pos = 1; - goto FOUND; - } - } - } - } - } - elsif ( $patterns ) - { - foreach $pattern ( @{ $patterns } ) - { - if ( $options->{ "keys" } ) - { - foreach $key ( @{ $options->{ "keys" } } ) - { - $pos = index $record->{ $key }, $pattern; - - goto FOUND if $pos >= 0; - } - } - else - { - foreach $key ( keys %{ $record } ) - { - if ( not $options->{ "vals_only" } ) - { - $pos = index $key, $pattern; - - goto FOUND if $pos >= 0; - } - - if ( not $options->{ "keys_only" } ) - { - $pos = index $record->{ $key }, $pattern; - - goto FOUND if $pos >= 0; - } - } - } - } - } - elsif ( $options->{ "regex" } ) - { - if ( $options->{ "keys" } ) - { - foreach $key ( @{ $options->{ "keys" } } ) - { - if ( $options->{ "case_insensitive" } ) { - $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/i; - } else { - $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/; - } - - goto FOUND if $pos >= 0; - } - } - else - { - foreach $key ( keys %{ $record } ) - { - if ( not $options->{ "vals_only" } ) - { - if ( $options->{ "case_insensitive" } ) { - $pos = 1 if $key =~ /$options->{'regex'}/i; - } else { - $pos = 1 if $key =~ /$options->{'regex'}/; - } - - goto FOUND if $pos >= 0; - } + $found = 0; - if ( not $options->{ "keys_only" } ) - { - if ( $options->{ "case_insensitive" } ) { - $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/i; - } else { - $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/; - } - - goto FOUND if $pos >= 0; - } - } - } - } - elsif ( $options->{ "eval" } ) - { - if ( defined $record->{ $key } ) - { - if ( $op eq "<" and $record->{ $key } < $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq ">" and $record->{ $key } > $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq ">=" and $record->{ $key } >= $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq "<=" and $record->{ $key } <= $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq "=" and $record->{ $key } == $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq "!=" and $record->{ $key } != $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq "eq" and $record->{ $key } eq $val ) { - $pos = 1 and goto FOUND; - } elsif ( $op eq "ne" and $record->{ $key } ne $val ) { - $pos = 1 and goto FOUND; - } - } + if ( %lookup_hash ) { + $found = grab_lookup( \%lookup_hash, $record, $keys, $vals_only, $keys_only ); + } elsif ( $patterns ) { + $found = grab_patterns( $patterns, $record, $keys, $vals_only, $keys_only ); + } elsif ( $regex ) { + $found = grab_regex( $regex, $record, $keys, $vals_only, $keys_only ); + } elsif ( $op ) { + $found = grab_eval( $key, $op, $val, $record ); } - FOUND: - - if ( $pos >= 0 and not $options->{ "invert" } ) { + if ( $found and not $invert ) { put_record( $record, $out ); - } elsif ( $pos < 0 and $options->{ "invert" } ) { + } elsif ( not $found and $invert ) { put_record( $record, $out ); } } @@ -6245,6 +6127,149 @@ sub script_upload_to_ucsc # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +sub grab_lookup +{ + # Martin A. Hansen, November 2009. + + # Uses keys from a lookup hash to search records. Optionally, a list of + # keys can be given so the lookup is limited to these, also, flags + # can be given to limit lookup to keys or vals only. Returns 1 if lookup + # succeeded, else 0. + + my ( $lookup_hash, # hashref with patterns + $record, # hashref + $keys, # list of keys - OPTIONAL + $vals_only, # only vals flag - OPTIONAL + $keys_only, # only keys flag - OPTIONAL + ) = @_; + + # Returns boolean. + + if ( $keys ) + { + map { return 1 if exists $lookup_hash->{ $record->{ $_ } } } @{ $keys }; + } + else + { + if ( not $vals_only ) { + map { return 1 if exists $lookup_hash->{ $_ } } keys %{ $record }; + } + + if ( not $keys_only ) { + map { return 1 if exists $lookup_hash->{ $record->{ $_ } } } keys %{ $record }; + } + } + + return 0; +} + + +sub grab_patterns +{ + # Martin A. Hansen, November 2009. + + # Uses patterns to match records containing the pattern as a substring. + # Returns 1 if the record is matched, else 0. + + my ( $patterns, # list of patterns + $record, # hashref + $keys, # list of keys - OPTIONAL + $vals_only, # only vals flag - OPTIONAL + $keys_only, # only keys flag - OPTIONAL + ) = @_; + + # Returns boolean. + + my ( $pattern ); + + foreach $pattern ( @{ $patterns } ) + { + if ( $keys ) + { + map { return 1 if index( $record->{ $_ }, $pattern ) >= 0 } @{ $keys }; + } + else + { + if ( not $vals_only ) { + map { return 1 if index( $_, $pattern ) >= 0 } keys %{ $record }; + } + + if ( not $keys_only ) { + map { return 1 if index( $record->{ $_ }, $pattern ) >= 0 } keys %{ $record }; + } + } + } + + return 0; +} + + +sub grab_regex +{ + # Martin A. Hansen, November 2009. + + # Uses regex to match records. + # Returns 1 if the record is matched, else 0. + + my ( $regex, # regex to match + $record, # hashref + $keys, # list of keys - OPTIONAL + $vals_only, # only vals flag - OPTIONAL + $keys_only, # only keys flag - OPTIONAL + ) = @_; + + # Returns boolean. + + if ( $keys ) + { + map { return 1 if $record->{ $_ } =~ /$regex/ } @{ $keys }; + } + else + { + if ( not $vals_only ) { + map { return 1 if $_ =~ /$regex/ } keys %{ $record }; + } + + if ( not $keys_only ) { + map { return 1 if $record->{ $_ } =~ /$regex/ } keys %{ $record }; + } + } + + return 0; +} + + +sub grab_eval +{ + # Martin A. Hansen, November 2009. + + # Test if the value of a given record key evaluates according + # to a given operator. Returns 1 if eval is OK, else 0. + + my ( $key, # record key + $op, # operator + $val, # value + $record, # hashref + ) = @_; + + # Returns boolean. + + if ( defined $record->{ $key } ) + { + return 1 if ( $op eq "<" and $record->{ $key } < $val ); + return 1 if ( $op eq ">" and $record->{ $key } > $val ); + return 1 if ( $op eq ">=" and $record->{ $key } >= $val ); + return 1 if ( $op eq "<=" and $record->{ $key } <= $val ); + return 1 if ( $op eq "=" and $record->{ $key } == $val ); + return 1 if ( $op eq "!=" and $record->{ $key } != $val ); + return 1 if ( $op eq "eq" and $record->{ $key } eq $val ); + return 1 if ( $op eq "ne" and $record->{ $key } ne $val ); + } + + return 0; +} + + sub record2fasta { # Martin A. Hansen, July 2008. diff --git a/code_perl/Maasha/Seq.pm b/code_perl/Maasha/Seq.pm index 4f7cdb2..42df6d3 100644 --- a/code_perl/Maasha/Seq.pm +++ b/code_perl/Maasha/Seq.pm @@ -636,7 +636,7 @@ sub generate_dna_oligos # Generates all possible DNA oligos of a given wordsize. - # alternative way: perl -MData::Dumper -e '@CONV = glob( "{T,C,A,G}" x 4 ); print Dumper( \@CONV )' + # alternative way: perl -MData::Dumper -e '@CONV = glob( "{A,T,C,G}" x 4 ); print Dumper( \@CONV )' my ( $wordsize, # size of DNA oligos -- 2.39.5