From 388aa3a2b83ae1ec3c4ceb703f942ea80070f16f Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Fri, 18 Aug 2023 12:21:00 -0700 Subject: [PATCH] fix issue c250 part 2, new sub token type S --- lib/Perl/Tidy/Formatter.pm | 111 +++++++++++++++++++----------------- lib/Perl/Tidy/HtmlWriter.pm | 8 ++- lib/Perl/Tidy/Tokenizer.pm | 44 +++++++------- 3 files changed, 86 insertions(+), 77 deletions(-) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index ed0139a7..7708609c 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -2801,11 +2801,11 @@ sub initialize_whitespace_hashes { # simple as adding your new letter to @spaces_both_sides, for # example. - # fix for c250: added space rules new package type 'P' + # fix for c250: added space rules new package type 'P' and sub type 'S' my @spaces_both_sides = qw# + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ - &&= ||= //= <=> A k f w F n C Y U G v P + &&= ||= //= <=> A k f w F n C Y U G v P S #; my @spaces_left_side = qw< @@ -4183,6 +4183,8 @@ EOM $right_bond_strength{'CORE::'} = NO_BREAK; # Fix for c250: added strengths for new type 'P' + # Note: these are working okay, but may eventually need to be + # adjusted or even removed. $left_bond_strength{'P'} = NOMINAL; $right_bond_strength{'P'} = NOMINAL; @@ -4460,6 +4462,10 @@ EOM $binary_bond_strength{'t'}{'L{'} = NO_BREAK; $binary_bond_strength{'i'}{'L{'} = NO_BREAK; + # Fix for c250: set strength for new 'S' to be same as 'i' + # testfile is test11/Hub.pm + $binary_bond_strength{'S'}{'L{'} = NO_BREAK; + # As a defensive measure, do not break between a '(' and a # filehandle. In some cases, this can cause an error. For # example, the following program works: @@ -8083,7 +8089,7 @@ sub dump_verbatim { my %wU; my %wiq; -my %is_witP; +my %is_witPS; my %is_sigil; my %is_nonlist_keyword; my %is_nonlist_type; @@ -8100,8 +8106,8 @@ BEGIN { @q = qw(w i q Q G C Z); @{wiq}{@q} = (1) x scalar(@q); - @q = qw(w i t P); # Fix for c250: added new type 'P', formerly 'i' - @{is_witP}{@q} = (1) x scalar(@q); + @q = qw(w i t P S); # Fix for c250: added new types 'P', 'S', formerly 'i' + @{is_witPS}{@q} = (1) x scalar(@q); @q = qw($ & % * @); @{is_sigil}{@q} = (1) x scalar(@q); @@ -8639,8 +8645,8 @@ sub respace_tokens_inner_loop { # Modify certain tokens here for whitespace # The following is not yet done, but could be: # sub (x x x) - # ( $type =~ /^[wit]$/ ) - elsif ( $is_witP{$type} ) { + # ( $type =~ /^[witPS]$/ ) + elsif ( $is_witPS{$type} ) { # index() is several times faster than a regex test with \s here ## $token =~ /\s/ @@ -8667,54 +8673,49 @@ sub respace_tokens_inner_loop { } } - # Trim certain spaces in identifiers - if ( $type eq 'i' ) { - - if ( $token =~ /$SUB_PATTERN/ ) { - - # -spp = 0 : no space before opening prototype paren - # -spp = 1 : stable (follow input spacing) - # -spp = 2 : always space before opening prototype paren - if ( !defined($rOpts_space_prototype_paren) - || $rOpts_space_prototype_paren == 1 ) - { - ## default: stable - } - elsif ( $rOpts_space_prototype_paren == 0 ) { - $token =~ s/\s+\(/\(/; - } - elsif ( $rOpts_space_prototype_paren == 2 ) { - $token =~ s/\(/ (/; - } + # trim identifiers of trailing blanks which can occur + # under some unusual circumstances, such as if the + # identifier 'witch' has trailing blanks on input here: + # + # sub + # witch + # () # prototype may be on new line ... + # ... + my $ord_ch = ord( substr( $token, -1, 1 ) ); + if ( - # one space max, and no tabs - $token =~ s/\s+/ /g; - $rtoken_vars->[_TOKEN_] = $token; + # quick check for possible ending space + $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN + || $ord_ch > ORD_PRINTABLE_MAX ) + ) + { + $token =~ s/\s+$//g; + $rtoken_vars->[_TOKEN_] = $token; + } - $self->[_ris_special_identifier_token_]->{$token} = - 'sub'; + # Fixed for c250 to use 'S' for sub definitions + if ( $type eq 'S' ) { + # -spp = 0 : no space before opening prototype paren + # -spp = 1 : stable (follow input spacing) + # -spp = 2 : always space before opening prototype paren + if ( !defined($rOpts_space_prototype_paren) + || $rOpts_space_prototype_paren == 1 ) + { + ## default: stable + } + elsif ( $rOpts_space_prototype_paren == 0 ) { + $token =~ s/\s+\(/\(/; + } + elsif ( $rOpts_space_prototype_paren == 2 ) { + $token =~ s/\(/ (/; } - # trim identifiers of trailing blanks which can occur - # under some unusual circumstances, such as if the - # identifier 'witch' has trailing blanks on input here: - # - # sub - # witch - # () # prototype may be on new line ... - # ... - my $ord_ch = ord( substr( $token, -1, 1 ) ); - if ( + # one space max, and no tabs + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; - # quick check for possible ending space - $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN - || $ord_ch > ORD_PRINTABLE_MAX ) - ) - { - $token =~ s/\s+$//g; - $rtoken_vars->[_TOKEN_] = $token; - } + $self->[_ris_special_identifier_token_]->{$token} = 'sub'; } # and trim spaces in package statements (added for c250) @@ -18121,8 +18122,8 @@ EOM } # blank lines before subs except declarations and one-liners - # Fix for c250: added new type 'P' - elsif ( $leading_type eq 'i' || $leading_type eq 'P' ) { + # Fix for c250: added new type 'P', changed 'i' to 'S' + elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) { my $special_identifier = $self->[_ris_special_identifier_token_]->{$leading_token}; if ($special_identifier) { @@ -21709,6 +21710,11 @@ sub break_lines_inner_loop { # Do not separate an isolated bare word from an opening paren. # Alternate Fix #2 for issue b1299. This waits as long as possible # to make the decision. + # Note for fix #c250: to keep line breaks unchanged under -extrude when + # switching from 'i' to 'S' for subs, we would have to also check 'S', i.e. + # =~/^[Si]$/. But this was never necessary at a sub signature, so we leave + # it alone and allow the new version to be different for --extrude. For a + # test file run perl527/signatures.t with --extrude. if ( $types_to_go[$i_begin] eq 'i' && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ ) { @@ -22400,7 +22406,8 @@ sub do_colon_breaks { # always open comma lists not preceded by keywords, # barewords, identifiers (that is, anything that doesn't # look like a function call) - my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; + # c250: added new sub identifier type 'S' + my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiUS]$/; $self->table_maker( { diff --git a/lib/Perl/Tidy/HtmlWriter.pm b/lib/Perl/Tidy/HtmlWriter.pm index 7c751c62..180c4aa1 100644 --- a/lib/Perl/Tidy/HtmlWriter.pm +++ b/lib/Perl/Tidy/HtmlWriter.pm @@ -370,6 +370,7 @@ BEGIN { # When adding NEW_TOKENS: update this hash table # $type => $short_name + # c250: changed 'M' to 'S' %token_short_names = ( '#' => 'c', 'n' => 'n', @@ -390,7 +391,7 @@ BEGIN { 'f' => 'sc', '(' => 'p', ')' => 'p', - 'M' => 'm', + 'S' => 'm', 'pd' => 'pd', 'A' => 'co', ); @@ -1321,12 +1322,13 @@ sub markup_tokens { # Intercept a sub name here; split it # into keyword 'sub' and sub name; and add an # entry in the toc + # Fix for c250: switch from 'i' to 'S' #------------------------------------------------------- - if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { + if ( $type eq 'S' && $token =~ /^(\w+\s+)(\w.*)$/ ) { $token = $self->markup_html_element( $1, 'k' ); push @colored_tokens, $token; $token = $2; - $type = 'M'; + $type = 'S'; # but don't include sub declarations in the toc; # these will have leading token types 'i;' diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 24634f76..1a87fbbd 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -3104,7 +3104,7 @@ EOM # Added 'package' (can be 'class') for --use-feature=class (rt145706) if ( substr( $statement_type, 0, 3 ) eq 'sub' ) { $last_nonblank_token = $statement_type; - $last_nonblank_type = 'i'; + $last_nonblank_type = 'S'; # c250 change $statement_type = EMPTY_STRING; } elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) { @@ -5281,7 +5281,7 @@ EOM # this pre-token will start an output token push( @{$routput_token_list}, $i_tok ); - # The search for the full token ends in one of 5 main END NODES + # The search for the full token ends in one of 5 main END NODES: #----------------------- # END NODE 1: whitespace @@ -5468,9 +5468,9 @@ EOM next; } - #----------------------------- - # END NODE 5: all other tokens - #----------------------------- + #------------------------------------------ + # END NODE 5: everything else (punctuation) + #------------------------------------------ my $code = $tokenization_code->{$tok}; if ($code) { $code->($self); @@ -5784,8 +5784,9 @@ BEGIN { # note: this is identical to '@value_requestor_type' defined later. # Fix for c250: add new type 'P' for package (expecting VERSION or {} # after package NAMESPACE, so expecting TERM) + # Fix for c250: add new type 'S' for sub (not expecting operator) my @q = qw( - ; ! + x & ? F J - p / Y : % f U ~ A G j L P * . | ^ < = [ m { \ > t + ; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { \ > t || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= &= // >> ~. &. |. ^. ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ @@ -5805,7 +5806,7 @@ BEGIN { # 'i' is currently excluded because it might be a package # 'q' is currently excluded because it might be a prototype # Fix for c030: removed '->' from this list: - # Fix for c250: added 'i' after new type 'P' added + # Fix for c250: added 'i' because new type 'P' was added @q = qw( -- C h R ++ ] Q <> i ); ## n v q ); push @q, ')'; @{op_expected_table}{@q} = (OPERATOR) x scalar(@q); @@ -5902,7 +5903,8 @@ sub operator_expected { # Types 'n', 'v', 'q' also depend on context. # identifier... - # Fix for c250: type 'i' and new type 'P' are in the hash table now + # Fix for c250: removed coding for type 'i' because 'i' and new type 'P' + # are now done by hash table lookup # keyword... if ( $last_nonblank_type eq 'k' ) { @@ -6260,21 +6262,19 @@ sub code_block_type { } # or a sub or package BLOCK - # Fixed for c250 to include new package type 'P' - # FIXME: this could use optimization + # Fixed for c250 to include new package type 'P', and change 'i' to 'S' elsif ( - ( - $last_nonblank_type eq 'i' - || $last_nonblank_type eq 't' - || $last_nonblank_type eq 'P' - ) - && $last_nonblank_token =~ /^(sub|package)\b/ + $last_nonblank_type eq 'P' + || $last_nonblank_type eq 'S' + || ( $last_nonblank_type eq 't' + && substr( $last_nonblank_token, 0, 3 ) eq 'sub' ) ) { return $last_nonblank_token; } # or a sub alias + # FIXME: see if this is really needed after the c250 update elsif (( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) && ( $is_sub{$last_nonblank_token} ) ) { @@ -8670,7 +8670,7 @@ EOM my $pos = pos($input_line); my $numc = $pos - $pos_beg; $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); - $type = 'i'; + $type = 'S'; ## Fix for c250, was 'i'; # remember the sub name in case another call is needed to # get the prototype @@ -8724,7 +8724,7 @@ EOM # Patch part #1 to fixes cases b994 and b1053: # Mark an anonymous sub keyword without prototype as type 'k', i.e. # 'sub : lvalue { ...' - $type = 'i'; + $type = 'S'; ## C250, was 'i'; if ( $tok eq 'sub' && !$proto ) { $type = 'k' } } @@ -10075,8 +10075,8 @@ The following additional token types are defined: C user-defined constant or constant function (with void prototype = ()) U user-defined function taking parameters G user-defined function taking block parameter (like grep/map/eval) - M (unused, but reserved for subroutine definition name) - P package definition + S sub definition (reported as type 'i' in older versions) + P package definition (reported as type 'i' in older versions) t type indicater such as %,$,@,*,&,sub w bare word (perhaps a subroutine call) i identifier of some type (with leading %, $, @, *, &, sub, -> ) @@ -10142,9 +10142,9 @@ BEGIN { # make a hash of all valid token types for self-checking the tokenizer # (adding NEW_TOKENS : select a new character and add to this list) - # fix for c250: added new token type 'P' + # fix for c250: added new token type 'P' and 'S' my @valid_token_types = qw# - A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P + A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & #; push( @valid_token_types, @digraphs ); -- 2.39.5