From: Steve Hancock Date: Thu, 21 Apr 2022 01:20:06 +0000 (-0700) Subject: fix minor tokenization issue b1337 X-Git-Tag: 20220613~55 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=dfc97a6c835a8b19386c7ad0aa56af721648951c;p=perltidy.git fix minor tokenization issue b1337 --- diff --git a/dev-bin/run_convergence_tests.pl.data b/dev-bin/run_convergence_tests.pl.data index 01d978e8..260f108f 100644 --- a/dev-bin/run_convergence_tests.pl.data +++ b/dev-bin/run_convergence_tests.pl.data @@ -9729,6 +9729,36 @@ params => --indent-columns=0 --maximum-line-length=96 +==> b1337.in <== +# S1: 'new' was given type 'i' here + print JSON::PP->new + ->indent + ->space_after + ->canonical + ->convert_blessed + ->encode( + $self->{abbrev} + ? $schema + ->abbreviated + : $schema ); + +# S2: 'new' was given type 'w' here, but type 'i' is better + print JSON::PP-> + new->indent + ->space_after + ->canonical + ->convert_blessed + ->encode( + $self->{abbrev} + ? $schema + ->abbreviated + : $schema ); + +==> b1337.par <== +--ignore-old-breakpoints +--maximum-line-length=19 +--variable-maximum-line-length + ==> b140.in <== $cmd[ $i ]=[ $s, $e, $cmd, \@hunk, $i ] ; diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 0a27052a..3b67d57a 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -2545,8 +2545,12 @@ sub set_whitespace_flags { my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 ); - my ( $rtokh, $token, $type ); - my ( $rtokh_last, $last_token, $last_type ); + my ( $rtokh, $token, $type ); + my $rtokh_last = $rLL->[0]; + my $rtokh_last_last = $rtokh_last; + + my $last_type = ''; + my $last_token = ''; my $j_tight_closing_paren = -1; @@ -2658,6 +2662,8 @@ sub set_whitespace_flags { next; } + $rtokh_last_last = $rtokh_last; + $rtokh_last = $rtokh; $last_token = $token; $last_type = $type; @@ -2709,19 +2715,19 @@ sub set_whitespace_flags { } else { $tightness = $tightness{$last_token} } - #============================================================= - # Patch for test problem <> - # We must always avoid spaces around a bare word beginning - # with ^ as in: - # my $before = ${^PREMATCH}; - # Because all of the following cause an error in perl: - # my $before = ${ ^PREMATCH }; - # my $before = ${ ^PREMATCH}; - # my $before = ${^PREMATCH }; - # So if brace tightness flag is -bt=0 we must temporarily reset - # to bt=1. Note that here we must set tightness=1 and not 2 so - # that the closing space - # is also avoided (via the $j_tight_closing_paren flag in coding) + #============================================================= + # Patch for test problem <> + # We must always avoid spaces around a bare word beginning + # with ^ as in: + # my $before = ${^PREMATCH}; + # Because all of the following cause an error in perl: + # my $before = ${ ^PREMATCH }; + # my $before = ${ ^PREMATCH}; + # my $before = ${^PREMATCH }; + # So if brace tightness flag is -bt=0 we must temporarily reset + # to bt=1. Note that here we must set tightness=1 and not 2 so + # that the closing space is also avoided + # (via the $j_tight_closing_paren flag in coding) if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } #============================================================= @@ -2799,21 +2805,21 @@ sub set_whitespace_flags { my $seqno = $rtokh->[_TYPE_SEQUENCE_]; - # This will have to be tweaked as tokenization changes. - # We usually want a space at '} (', for example: - # <> - # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); - # - # But not others: - # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); - # At present, the above & block is marked as type L/R so this case - # won't go through here. + # This will have to be tweaked as tokenization changes. + # We usually want a space at '} (', for example: + # <> + # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); + # + # But not others: + # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); + # At present, the above & block is marked as type L/R so this + # case won't go through here. if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES } - # NOTE: some older versions of Perl had occasional problems if - # spaces are introduced between keywords or functions and opening - # parens. So the default is not to do this except is certain - # cases. The current Perl seems to tolerate spaces. + # NOTE: some older versions of Perl had occasional problems if + # spaces are introduced between keywords or functions and + # opening parens. So the default is not to do this except is + # certain cases. The current Perl seems to tolerate spaces. # Space between keyword and '(' elsif ( $last_type eq 'k' ) { @@ -2831,18 +2837,35 @@ sub set_whitespace_flags { # myfun( &myfun( ->myfun( # ----------------------------------------------------- - # Note that at this point an identifier may still have a leading - # arrow, but the arrow will be split off during token respacing. - # After that, the token may become a bare word without leading - # arrow. The point is, it is best to mark function call parens - # right here before that happens. - # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()' - # NOTE: this would be the place to allow spaces between repeated - # parens, like () () (), as in case c017, but I decided that would - # not be a good idea. + # Note that at this point an identifier may still have a + # leading arrow, but the arrow will be split off during token + # respacing. After that, the token may become a bare word + # without leading arrow. The point is, it is best to mark + # function call parens right here before that happens. + # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()' + # NOTE: this would be the place to allow spaces between + # repeated parens, like () () (), as in case c017, but I + # decided that would not be a good idea. elsif ( - ( $last_type =~ /^[wCUG]$/ ) - || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ ) + $last_type =~ /^[wCUG]$/ + || ( + $last_type =~ /^[wi]$/ + + && ( + $last_token =~ /^([\&]|->)/ + + # or -> or & split from bareword by newline (b1337) + || ( + $last_token =~ /^\w/ + && ( + $rtokh_last_last->[_TYPE_] eq '->' + || ( $rtokh_last_last->[_TYPE_] eq 't' + && $rtokh_last_last->[_TOKEN_] =~ + /^\&\s*$/ ) + ) + ) + ) + ) ) { $ws = $rOpts_space_function_paren ? WS_YES : WS_NO; @@ -2850,10 +2873,10 @@ sub set_whitespace_flags { $ris_function_call_paren->{$seqno} = 1; } - # space between something like $i and ( in <> - # for $i ( 0 .. 20 ) { - # FIXME: eventually, type 'i' could be split into multiple - # token types so this can be a hardwired rule. + # space between something like $i and ( in 'snippets/space2.in' + # for $i ( 0 .. 20 ) { + # FIXME: eventually, type 'i' could be split into multiple + # token types so this can be a hardwired rule. elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { $ws = WS_YES; } @@ -2996,24 +3019,24 @@ sub set_whitespace_flags { # Apply default rules not covered above. #--------------------------------------------------------------- - # If we fall through to here, look at the pre-defined hash tables for - # the two tokens, and: - # if (they are equal) use the common value - # if (either is zero or undef) use the other - # if (either is -1) use it - # That is, - # left vs right - # 1 vs 1 --> 1 - # 0 vs 0 --> 0 - # -1 vs -1 --> -1 - # - # 0 vs -1 --> -1 - # 0 vs 1 --> 1 - # 1 vs 0 --> 1 - # -1 vs 0 --> -1 - # - # -1 vs 1 --> -1 - # 1 vs -1 --> -1 + # If we fall through to here, look at the pre-defined hash tables + # for the two tokens, and: + # if (they are equal) use the common value + # if (either is zero or undef) use the other + # if (either is -1) use it + # That is, + # left vs right + # 1 vs 1 --> 1 + # 0 vs 0 --> 0 + # -1 vs -1 --> -1 + # + # 0 vs -1 --> -1 + # 0 vs 1 --> 1 + # 1 vs 0 --> 1 + # -1 vs 0 --> -1 + # + # -1 vs 1 --> -1 + # 1 vs -1 --> -1 if ( !defined($ws) ) { my $wl = $want_left_space{$type}; my $wr = $want_right_space{$last_type}; diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 51900045..6a060b62 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -7736,6 +7736,12 @@ sub scan_identifier_do { # type 'w' includes anything without leading type info # ($,%,@,*) including something like abc::def::ghi $type = 'w'; + + # Fix for b1337, if restarting scan after line break between '->' or + # sigil and identifier name, use type 'i' + if ( $id_scan_state_begin && $identifier =~ /^([\$\%\@\*\&]|->)/ ) { + $type = 'i'; + } } else { $type = '';