From 4652fc2f70c456250b07a18d26068357fae718a7 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Wed, 21 Oct 2020 17:06:37 -0700 Subject: [PATCH] speedup sub operator_expected about 40% with hash lookup --- lib/Perl/Tidy/Tokenizer.pm | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index cd6d1d96..76aa47eb 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -4383,6 +4383,37 @@ EOM # Tokenizer routines which assist in identifying token types ####################################################################### +# hash to speed up sub operator_expected +my %quick_op_expected; + +BEGIN { + + # Always expecting TERM following these types: + my @q = qw( + ; ! + x & ? F J - p / Y : % f U ~ A G j L * . | ^ < = [ m { \ > + || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= &= // >> + ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ + ); + push @q, ','; + @{quick_op_expected}{@q} = (TERM) x scalar(@q); + + # Always UNKNOWN following these types: [for completeness, nothing here] + # Note that type 'w' is almost always UNKNOWN but not always. + @q = qw( ); + @{quick_op_expected}{@q} = (UNKNOWN) x scalar(@q); + + # Always expecting OPERATOR following these types: + @q = qw( -- C -> h R ++ ] ); + push @q, ')'; + @{quick_op_expected}{@q} = (OPERATOR) x scalar(@q); + + # Mixed: expectation depends on additional context: + # We will have to execute the full sub for these: + @q = qw( k } t n i Z <> q Q v w ); + @{quick_op_expected}{@q} = (undef) x scalar(@q); + +} + sub operator_expected { # Many perl symbols have two or more meanings. For example, '<<' @@ -4422,6 +4453,18 @@ sub operator_expected { my ( $prev_type, $tok, $next_type ) = @_; use constant DEBUG_EXPECT => 0; + # Optional shortcut which covers about 50% of cases and reduces run time + # of this sub by about 40%. To verify correctness, run with -I on numerous + # test files and check for 'ERROR' in the resulting DIAGNOSTICS file. + my $op_expected_quick = $quick_op_expected{$last_nonblank_type}; + if ( defined($op_expected_quick) ) { + return $op_expected_quick + unless defined( $tokenizer_self->[_diagnostics_object_] ); + + # in -I mode we continue and compare results with the full sub + # just before the return below. + } + my $op_expected = UNKNOWN; ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; @@ -4647,6 +4690,13 @@ sub operator_expected { ); } + if ( defined($op_expected_quick) && $op_expected_quick != $op_expected ) { + write_diagnostics(<