From: Steve Hancock Date: Fri, 9 Jul 2021 18:52:20 +0000 (-0700) Subject: Improve logic for distinguishing a pattern vs a division X-Git-Tag: 20210625.02~1 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ffe4351b8b560e6ed67ca1105a96cfccc8c4fa05;p=perltidy.git Improve logic for distinguishing a pattern vs a division --- diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index d19f7a74..c82a7d5c 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -2324,11 +2324,11 @@ EOM $type = $tok; } - #DEBUG - collecting info on what tokens follow a divide - # for development of guessing algorithm - #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { - # #write_diagnostics( "DIVIDE? $input_line\n" ); - #} + #DEBUG - collecting info on what tokens follow a divide + # for development of guessing algorithm + #if ( is_possible_numerator( $i, $rtokens, $max_token_index ) < 0 ) { + # #write_diagnostics( "DIVIDE? $input_line\n" ); + #} } }, '{' => sub { @@ -6061,8 +6061,15 @@ sub guess_if_pattern_or_division { } else { my $ibeg = $i; - my $divide_expected = - numerator_expected( $i, $rtokens, $max_token_index ); + my $divide_possible = + is_possible_numerator( $i, $rtokens, $max_token_index ); + + if ( $divide_possible < 0 ) { + $msg = "pattern (division not possible here)\n"; + $is_pattern = 1; + goto RETURN; + } + $i = $ibeg + 1; my $next_token = $rtokens->[$i]; # first token after slash @@ -6097,7 +6104,7 @@ sub guess_if_pattern_or_division { # we didn't find an ending / on this line, so we bias towards # division - if ( $divide_expected >= 0 ) { + if ( $divide_possible >= 0 ) { $is_pattern = 0; $msg .= "division (no ending / on this line)\n"; } @@ -6121,12 +6128,12 @@ sub guess_if_pattern_or_division { if ( $pattern_expected >= 0 ) { # pattern looks possible... - if ( $divide_expected >= 0 ) { + if ( $divide_possible >= 0 ) { # Both pattern and divide can work here... # Increase weight of divide if a pure number follows - $divide_expected += $next_token =~ /^\d+$/; + $divide_possible += $next_token =~ /^\d+$/; # Check for known constants in the numerator, like 'pi' if ( $is_known_constant{$last_nonblank_token} ) { @@ -6143,7 +6150,7 @@ sub guess_if_pattern_or_division { } # If one rule is more definite, use it - elsif ( $divide_expected > $pattern_expected ) { + elsif ( $divide_possible > $pattern_expected ) { $msg .= "division (more likely based on following tokens)\n"; $is_pattern = 0; @@ -6162,7 +6169,7 @@ sub guess_if_pattern_or_division { } } - # divide_expected < 0 means divide can not work here + # divide_possible < 0 means divide can not work here else { $is_pattern = 1; $msg .= "pattern (division not possible)\n"; @@ -6172,7 +6179,7 @@ sub guess_if_pattern_or_division { # pattern does not look possible... else { - if ( $divide_expected >= 0 ) { + if ( $divide_possible >= 0 ) { $is_pattern = 0; $msg .= "division (pattern not possible)\n"; } @@ -6191,6 +6198,8 @@ sub guess_if_pattern_or_division { } } } + + RETURN: return ( $is_pattern, $msg ); } @@ -7681,37 +7690,39 @@ sub find_next_nonblank_token { return ( $next_nonblank_token, $i ); } -sub numerator_expected { +sub is_possible_numerator { - # this is a filter for a possible numerator, in support of guessing - # for the / pattern delimiter token. - # returns - + # Look at the next non-comment character and decide if it could be a + # numerator. Return # 1 - yes # 0 - can't tell # -1 - no - # Note: I am using the convention that variables ending in - # _expected have these 3 possible values. + my ( $i, $rtokens, $max_token_index ) = @_; - my $numerator_expected = 0; + my $is_possible_numerator = 0; my $next_token = $rtokens->[ $i + 1 ]; if ( $next_token eq '=' ) { $i++; } # handle /= my ( $next_nonblank_token, $i_next ) = find_next_nonblank_token( $i, $rtokens, $max_token_index ); + if ( $next_nonblank_token eq '#' ) { + ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $max_token_index, $rtokens, + $max_token_index ); + } + if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { - $numerator_expected = 1; + $is_possible_numerator = 1; + } + elsif ( $next_nonblank_token =~ /^\s*$/ ) { + $is_possible_numerator = 0; } else { - - if ( $next_nonblank_token =~ /^\s*$/ ) { - $numerator_expected = 0; - } - else { - $numerator_expected = -1; - } + $is_possible_numerator = -1; } - return $numerator_expected; + + return $is_possible_numerator; } { ## closure for sub pattern_expected diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod index 54e9858f..757e5815 100644 --- a/local-docs/BugLog.pod +++ b/local-docs/BugLog.pod @@ -2,6 +2,31 @@ =over 4 +=item B + +Testing with side comments produced the following snippet which caused a error +due to the side comment on the '/' + + $bond_str + = + VERY_WEAK #sc# + / #sc# + 1.05 + ; + +Several related examples were found in which side comments triggered errors. +For example + + ok + /[^\s]+/#sc# + , + 'm/[^\s]/ utf8' + ; + +This update fixes these problems, case c040. + +9 Jul 2021. + =item B Testing with side comments produced an incorrect error message for this