From 7580f507e3b8d2f76f262274153606a1024a8de8 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Thu, 5 Sep 2024 14:29:53 -0700 Subject: [PATCH] simplify code for guessing slash type --- lib/Perl/Tidy/Tokenizer.pm | 459 +++++++++++++++++-------------------- 1 file changed, 208 insertions(+), 251 deletions(-) diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 7ee5ada0..24a8fb5d 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -7571,81 +7571,79 @@ sub guess_if_pattern_or_conditional { my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index ) = @_; my $is_pattern = 0; - my $msg = "guessing that ? after $last_nonblank_token starts a "; + my $msg = "guessing that ? after '$last_nonblank_token' starts a "; if ( $i >= $max_token_index ) { $msg .= "conditional (no end to pattern found on the line)\n"; + $is_pattern = 0; + return ( $is_pattern, $msg ); } - else { - my $ibeg = $i; - $i = $ibeg + 1; - ##my $next_token = $rtokens->[$i]; # first token after ? - - # look for a possible ending ? on this line.. - my $in_quote = 1; - my $quote_depth = 0; - my $quote_character = EMPTY_STRING; - my $quote_pos = 0; - my $quoted_string; - ( - $i, - $in_quote, - $quote_character, - $quote_pos, - $quote_depth, - $quoted_string, + my $ibeg = $i; + $i = $ibeg + 1; + ##my $next_token = $rtokens->[$i]; # first token after ? + + # look for a possible ending ? on this line.. + my $in_quote = 1; + my $quote_depth = 0; + my $quote_character = EMPTY_STRING; + my $quote_pos = 0; + my $quoted_string; + ( - ) = $self->follow_quoted_string( + $i, + $in_quote, + $quote_character, + $quote_pos, + $quote_depth, + $quoted_string, - $ibeg, - $in_quote, - $rtokens, - $rtoken_type, - $quote_character, - $quote_pos, - $quote_depth, - $max_token_index, + ) = $self->follow_quoted_string( - ); + $ibeg, + $in_quote, + $rtokens, + $rtoken_type, + $quote_character, + $quote_pos, + $quote_depth, + $max_token_index, - if ($in_quote) { + ); - # we didn't find an ending ? on this line, - # so we bias towards conditional - $is_pattern = 0; - $msg .= "conditional (no ending ? on this line)\n"; + if ($in_quote) { - # we found an ending ?, so we bias towards a pattern - } - else { + # we didn't find an ending ? on this line, + # so we bias towards conditional + $is_pattern = 0; + $msg .= "conditional (no ending ? on this line)\n"; + return ( $is_pattern, $msg ); + } - # Watch out for an ending ? in quotes, like this - # my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; - my $s_quote = 0; - my $d_quote = 0; - my $colons = 0; - foreach my $ii ( $ibeg + 1 .. $i - 1 ) { - my $tok = $rtokens->[$ii]; - if ( $tok eq ":" ) { $colons++ } - if ( $tok eq "'" ) { $s_quote++ } - if ( $tok eq '"' ) { $d_quote++ } - } - if ( $s_quote % 2 || $d_quote % 2 || $colons ) { - $is_pattern = 0; - $msg .= "found ending ? but unbalanced quote chars\n"; - } - elsif ( - $self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) - { - $is_pattern = 1; - $msg .= "pattern (found ending ? and pattern expected)\n"; - } - else { - $msg .= "pattern (uncertain, but found ending ?)\n"; - } - } + # we found an ending ?, so we bias towards a pattern + + # Watch out for an ending ? in quotes, like this + # my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; + my $s_quote = 0; + my $d_quote = 0; + my $colons = 0; + foreach my $ii ( $ibeg + 1 .. $i - 1 ) { + my $tok = $rtokens->[$ii]; + if ( $tok eq ":" ) { $colons++ } + if ( $tok eq "'" ) { $s_quote++ } + if ( $tok eq '"' ) { $d_quote++ } + } + if ( $s_quote % 2 || $d_quote % 2 || $colons ) { + $is_pattern = 0; + $msg .= "found ending ? but unbalanced quote chars\n"; + return ( $is_pattern, $msg ); } + if ( $self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { + $is_pattern = 1; + $msg .= "pattern (found ending ? and pattern expected)\n"; + return ( $is_pattern, $msg ); + } + $msg .= "pattern (uncertain, but found ending ?)\n"; return ( $is_pattern, $msg ); } ## end sub guess_if_pattern_or_conditional @@ -7665,211 +7663,147 @@ BEGIN { sub guess_if_pattern_or_division { - # this routine is called when we have encountered a / following an + # This routine is called when we have encountered a / following an # unknown bareword, and we must decide if it starts a pattern or is a - # division + # division. # input parameters: # $i - token index of the / starting possible pattern # output parameters: # $is_pattern = 0 if probably division, =1 if probably a pattern # msg = a warning or diagnostic message # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map_uu, $max_token_index ) - = @_; - my $msg = -"guessing that / after $last_nonblank_token type '$last_nonblank_type' starts a "; - my $ibeg = $i; - - # use info collected for barewords to help decide - my ( $function_count, $constant_count ) = - $self->get_bareword_counts($last_nonblank_token); + my ( $self, $i, $rtokens, $rtoken_type, $rtoken_map, $max_token_index ) = + @_; + my $msg = "guessing that / after '$last_nonblank_token' starts a "; + my $ibeg = $i; my $is_pattern = 0; - # anything more on this line? - if ( $i >= $max_token_index ) { - if ($function_count) { - $is_pattern = 1; - $msg .= -"pattern (no end to pattern found on the line, but function count=$function_count)\n"; - } - else { - $is_pattern = 0; - $msg .= -"division (no end to pattern found on the line, function count=0)\n"; - } - } - else { - my $divide_possible = - $self->is_possible_numerator( $i, $rtokens, $max_token_index ); - - if ( $divide_possible < 0 ) { - $msg = "pattern (division not possible here)\n"; - $is_pattern = 1; - $self->saw_bareword_function($last_nonblank_token); - return ( $is_pattern, $msg ); - } - - $i = $ibeg + 1; - my $next_token = $rtokens->[$i]; # first token after slash + my $divide_possible = + $self->is_possible_numerator( $i, $rtokens, $max_token_index ); - # One of the things we can look at is the spacing around the slash. - # There # are four possible spacings around the first slash: - # - # return pi/two;#/; -/- - # return pi/ two;#/; -/+ - # return pi / two;#/; +/+ - # return pi /two;#/; +/- <-- possible pattern - # - # Spacing rule: a space before the slash but not after the slash - # usually indicates a pattern. We can use this to break ties. - # Note: perl seems to take a newline as a space in this rule (c243) - my $space_before = $i < 2 || $rtokens->[ $i - 2 ] =~ m/^\s/; - my $space_after = $next_token =~ m/^\s/; - my $is_pattern_by_spacing = $space_before && !$space_after; - - # look for a possible ending / on this line.. - my $in_quote = 1; - my $quote_depth = 0; - my $quote_character = EMPTY_STRING; - my $quote_pos = 0; - my $quoted_string; - ( - - $i, - $in_quote, - $quote_character, - $quote_pos, - $quote_depth, - $quoted_string - ) - = $self->follow_quoted_string( - - $ibeg, - $in_quote, - $rtokens, - $rtoken_type, - $quote_character, - $quote_pos, - $quote_depth, - $max_token_index - ); + if ( $divide_possible < 0 ) { + $msg .= "pattern (division not possible here)\n"; + $is_pattern = 1; + $self->saw_bareword_function($last_nonblank_token); + return ( $is_pattern, $msg ); + } + if ( $divide_possible == 4 ) { + $msg .= "division (pattern not possible here)\n"; + $is_pattern = 0; + return ( $is_pattern, $msg ); + } - if ($in_quote) { + # anything left on line? + if ( $i >= $max_token_index ) { + $msg .= "division (line ends with this /)\n"; + $is_pattern = 0; + return ( $is_pattern, $msg ); + } - # we didn't find an ending / on this line - # first use any bareword info - if ($function_count) { - $msg .= -"pattern - no ending / on this line but have function count=$function_count\n"; - $is_pattern = 1; - } - elsif ( $divide_possible >= 0 ) { - $is_pattern = 0; - $constant_count = 0 unless defined($constant_count); - $msg .= -"division (no ending / on this line, function count=0, constant count=$constant_count)\n"; - } - else { - # assuming a multi-line pattern ... this is risky, but division - # does not seem possible. If this fails, it would either be due - # to a syntax error in the code, or the division_expected logic - # needs to be fixed. - $msg = "multi-line pattern (division not possible)\n"; - $is_pattern = 1; - } - } + # quick check for no pattern-ending slash on this line + my $pos_beg = $rtoken_map->[$ibeg]; + my $input_line = $self->[_line_of_text_]; + if ( index( $input_line, '/', $pos_beg + 1 ) < 0 ) { + $msg .= "division (no ending / on this line)\n"; + $is_pattern = 0; + return ( $is_pattern, $msg ); + } - # we found an ending /, it might terminate a pattern - else { + # Setup spacing rule before we change $i below.. + $i = $ibeg + 1; + my $next_token = $rtokens->[$i]; # first token after slash - my $pattern_expected = - $self->pattern_expected( $i, $rtokens, $max_token_index ); + # There are four possible spacings around the first slash: + # + # return pi/two;#/; -/- + # return pi/ two;#/; -/+ + # return pi / two;#/; +/+ + # return pi /two;#/; +/- <-- possible pattern + # + # Spacing rule: a space before the slash but not after the slash + # usually indicates a pattern. We can use this to break ties. + # Note: perl seems to take a newline as a space in this rule (c243) + my $space_before = $i < 2 || $rtokens->[ $i - 2 ] =~ m/^\s/; + my $space_after = $next_token =~ m/^\s/; + my $is_pattern_by_spacing = $space_before && !$space_after; + + # Make an accurate search for a possible terminating / on this line.. + my $in_quote = 1; + my $quote_depth = 0; + my $quote_character = EMPTY_STRING; + my $quote_pos = 0; + my $quoted_string; + ( - if ( $pattern_expected >= 0 ) { + $i, + $in_quote, + $quote_character, + $quote_pos, + $quote_depth, + $quoted_string + ) + = $self->follow_quoted_string( - # pattern looks possible... - if ( $divide_possible >= 0 ) { + $ibeg, + $in_quote, + $rtokens, + $rtoken_type, + $quote_character, + $quote_pos, + $quote_depth, + $max_token_index + ); - # Both pattern and divide can work here... + # if we didn't find an ending / on this line .. + if ($in_quote) { + $is_pattern = 0; + $msg .= "division (no ending / on this line)\n"; + return ( $is_pattern, $msg ); + } - # Increase weight of divide if a pure number follows - $divide_possible += $next_token =~ /^\d+$/; + # we found an ending /, see if it might terminate a pattern + my $pattern_expected = + $self->pattern_expected( $i, $rtokens, $max_token_index ); - # start with any bareword info - if ($function_count) { - $msg .= -"pattern - division works too but have function count=$function_count\n"; - $is_pattern = 1; - } - elsif ($constant_count) { - $msg .= -"constant - division works too but have constant count=$constant_count\n"; - $is_pattern = 0; - } + if ( $pattern_expected < 0 ) { + $is_pattern = 0; + $msg .= "division (pattern not possible)\n"; + return ( $is_pattern, $msg ); + } - # Check for known constants in the numerator, like 'pi' - elsif ( $is_known_constant{$last_nonblank_token} ) { - $msg .= + # Both pattern and divide can work here... + # Check for known constants in the numerator, like 'pi' + if ( $is_known_constant{$last_nonblank_token} ) { + $msg .= "division (pattern works too but saw known constant '$last_nonblank_token')\n"; - $is_pattern = 0; - } - - # A very common bare word in pattern expressions is 'ok' - elsif ( $is_known_function{$last_nonblank_token} ) { - $msg .= -"pattern (division works too but saw '$last_nonblank_token')\n"; - $is_pattern = 1; - } - - # If one rule is more definite, use it - elsif ( $divide_possible > $pattern_expected ) { - $msg .= - "division (more likely based on following tokens)\n"; - $is_pattern = 0; - } - - # otherwise, use the spacing rule - elsif ($is_pattern_by_spacing) { - $msg .= -"pattern (guess on spacing, but division possible too)\n"; - $is_pattern = 1; - } - else { - $msg .= -"division (guess on spacing, but pattern is possible too)\n"; - $is_pattern = 0; - } - } - - # divide_possible < 0 means divide can not work here - else { - $is_pattern = 1; - $msg .= "pattern (division not possible)\n"; - } - } + $is_pattern = 0; + return ( $is_pattern, $msg ); + } - # pattern does not look possible... - else { + # Check for known functions like 'ok' + if ( $is_known_function{$last_nonblank_token} ) { + $msg .= "pattern (division works too but saw '$last_nonblank_token')\n"; + $is_pattern = 1; + return ( $is_pattern, $msg ); + } - if ( $divide_possible >= 0 ) { - $is_pattern = 0; - $msg .= "division (pattern not possible)\n"; - } + # If one rule is more probable, use it + if ( $divide_possible > $pattern_expected ) { + $msg .= "division (more likely based on following tokens)\n"; + $is_pattern = 0; + return ( $is_pattern, $msg ); + } - # Neither pattern nor divide look possible...go by spacing - else { - if ($is_pattern_by_spacing) { - $msg .= "pattern (guess on spacing)\n"; - $is_pattern = 1; - } - else { - $msg .= "division (guess on spacing)\n"; - $is_pattern = 0; - } - } - } - } + # finally, we have to use the spacing rule + if ($is_pattern_by_spacing) { + $msg .= "pattern (guess on spacing, but division possible too)\n"; + $is_pattern = 1; + } + else { + $msg .= "division (guess on spacing, but pattern is possible too)\n"; + $is_pattern = 0; } + return ( $is_pattern, $msg ); } ## end sub guess_if_pattern_or_division @@ -9808,13 +9742,16 @@ sub find_next_noncomment_token { sub is_possible_numerator { # Look at the next non-comment character and decide if it could be a - # numerator. Return - # 1 - yes - # 0 - can't tell - # -1 - no + # numerator. Returns the following code: + # -1 - division not possible + # 0 - can't tell if division possible + # 1 - division possible + # 2 - division probable: number follows + # 3 - division very probable: number and one of ; ] } follow + # 4 - is division, not partern: number and ) follow my ( $self, $i, $rtokens, $max_token_index ) = @_; - my $is_possible_numerator = 0; + my $divide_possible_code = 0; my $next_token = $rtokens->[ $i + 1 ]; if ( $next_token eq '=' ) { $i++; } # handle /= @@ -9828,16 +9765,36 @@ sub is_possible_numerator { } if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) { - $is_possible_numerator = 1; + $divide_possible_code = 1; + + # look ahead one more token for some common patterns, such as + # pi/2) pi/2; pi/2} + if ( $next_nonblank_token =~ /^\d/ ) { + my ( $next_next_nonblank_token, $i_next_next_uu ) = + $self->find_next_nonblank_token( $i_next, $rtokens, + $max_token_index ); + if ( $next_next_nonblank_token eq ')' ) { + $divide_possible_code = 4; + } + elsif ($next_next_nonblank_token eq ';' + || $next_next_nonblank_token eq ']' + || $next_next_nonblank_token eq '}' ) + { + $divide_possible_code = 3; + } + else { + $divide_possible_code = 2; + } + } } elsif ( $next_nonblank_token =~ /^\s*$/ ) { - $is_possible_numerator = 0; + $divide_possible_code = 0; } else { - $is_possible_numerator = -1; + $divide_possible_code = -1; } - return $is_possible_numerator; + return $divide_possible_code; } ## end sub is_possible_numerator { ## closure for sub pattern_expected -- 2.39.5