fix issue c250 part 2, new sub token type S

author Steve Hancock <perltidy@users.sourceforge.net>

Fri, 18 Aug 2023 19:21:00 +0000 (12:21 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Fri, 18 Aug 2023 19:21:00 +0000 (12:21 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Fri, 18 Aug 2023 19:21:00 +0000 (12:21 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Fri, 18 Aug 2023 19:21:00 +0000 (12:21 -0700)
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index ed0139a7d1383c4dec1c2df6c90f8b287b1b344e..7708609c083e2ead6e31c5cd3ef3ebd230a9d946 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -2801,11 +2801,11 @@ sub initialize_whitespace_hashes {
      # simple as adding your new letter to @spaces_both_sides, for
      # example.
  
-    # fix for c250: added space rules new package type 'P'
+    # fix for c250: added space rules new package type 'P' and sub type 'S'
      my @spaces_both_sides = qw#
        + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
        .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
-      &&= ||= //= <=> A k f w F n C Y U G v P
+      &&= ||= //= <=> A k f w F n C Y U G v P S
        #;
  
      my @spaces_left_side = qw<
@@ -4183,6 +4183,8 @@ EOM
          $right_bond_strength{'CORE::'} = NO_BREAK;
  
          # Fix for c250: added strengths for new type 'P'
+        # Note: these are working okay, but may eventually need to be
+        # adjusted or even removed.
          $left_bond_strength{'P'}  = NOMINAL;
          $right_bond_strength{'P'} = NOMINAL;
  
@@ -4460,6 +4462,10 @@ EOM
          $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
          $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
  
+        # Fix for c250: set strength for new 'S' to be same as 'i'
+        # testfile is test11/Hub.pm
+        $binary_bond_strength{'S'}{'L{'} = NO_BREAK;
+
          # As a defensive measure, do not break between a '(' and a
          # filehandle.  In some cases, this can cause an error.  For
          # example, the following program works:
@@ -8083,7 +8089,7 @@ sub dump_verbatim {
  
  my %wU;
  my %wiq;
-my %is_witP;
+my %is_witPS;
  my %is_sigil;
  my %is_nonlist_keyword;
  my %is_nonlist_type;
@@ -8100,8 +8106,8 @@ BEGIN {
      @q = qw(w i q Q G C Z);
      @{wiq}{@q} = (1) x scalar(@q);
  
-    @q = qw(w i t P);    # Fix for c250: added new type 'P', formerly 'i'
-    @{is_witP}{@q} = (1) x scalar(@q);
+    @q = qw(w i t P S);   # Fix for c250: added new types 'P', 'S', formerly 'i'
+    @{is_witPS}{@q} = (1) x scalar(@q);
  
      @q = qw($ & % * @);
      @{is_sigil}{@q} = (1) x scalar(@q);
@@ -8639,8 +8645,8 @@ sub respace_tokens_inner_loop {
          # Modify certain tokens here for whitespace
          # The following is not yet done, but could be:
          #   sub (x x x)
-        #     ( $type =~ /^[wit]$/ )
-        elsif ( $is_witP{$type} ) {
+        #     ( $type =~ /^[witPS]$/ )
+        elsif ( $is_witPS{$type} ) {
  
              # index() is several times faster than a regex test with \s here
              ##   $token =~ /\s/
@@ -8667,54 +8673,49 @@ sub respace_tokens_inner_loop {
                      }
                  }
  
-                # Trim certain spaces in identifiers
-                if ( $type eq 'i' ) {
-
-                    if ( $token =~ /$SUB_PATTERN/ ) {
-
-                        # -spp = 0 : no space before opening prototype paren
-                        # -spp = 1 : stable (follow input spacing)
-                        # -spp = 2 : always space before opening prototype paren
-                        if ( !defined($rOpts_space_prototype_paren)
-                            || $rOpts_space_prototype_paren == 1 )
-                        {
-                            ## default: stable
-                        }
-                        elsif ( $rOpts_space_prototype_paren == 0 ) {
-                            $token =~ s/\s+\(/\(/;
-                        }
-                        elsif ( $rOpts_space_prototype_paren == 2 ) {
-                            $token =~ s/\(/ (/;
-                        }
+                # trim identifiers of trailing blanks which can occur
+                # under some unusual circumstances, such as if the
+                # identifier 'witch' has trailing blanks on input here:
+                #
+                # sub
+                # witch
+                # ()   # prototype may be on new line ...
+                # ...
+                my $ord_ch = ord( substr( $token, -1, 1 ) );
+                if (
  
-                        # one space max, and no tabs
-                        $token =~ s/\s+/ /g;
-                        $rtoken_vars->[_TOKEN_] = $token;
+                    # quick check for possible ending space
+                    $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
+                        || $ord_ch > ORD_PRINTABLE_MAX )
+                  )
+                {
+                    $token =~ s/\s+$//g;
+                    $rtoken_vars->[_TOKEN_] = $token;
+                }
  
-                        $self->[_ris_special_identifier_token_]->{$token} =
-                          'sub';
+                # Fixed for c250 to use 'S' for sub definitions
+                if ( $type eq 'S' ) {
  
+                    # -spp = 0 : no space before opening prototype paren
+                    # -spp = 1 : stable (follow input spacing)
+                    # -spp = 2 : always space before opening prototype paren
+                    if ( !defined($rOpts_space_prototype_paren)
+                        || $rOpts_space_prototype_paren == 1 )
+                    {
+                        ## default: stable
+                    }
+                    elsif ( $rOpts_space_prototype_paren == 0 ) {
+                        $token =~ s/\s+\(/\(/;
+                    }
+                    elsif ( $rOpts_space_prototype_paren == 2 ) {
+                        $token =~ s/\(/ (/;
                      }
  
-                    # trim identifiers of trailing blanks which can occur
-                    # under some unusual circumstances, such as if the
-                    # identifier 'witch' has trailing blanks on input here:
-                    #
-                    # sub
-                    # witch
-                    # ()   # prototype may be on new line ...
-                    # ...
-                    my $ord_ch = ord( substr( $token, -1, 1 ) );
-                    if (
+                    # one space max, and no tabs
+                    $token =~ s/\s+/ /g;
+                    $rtoken_vars->[_TOKEN_] = $token;
  
-                        # quick check for possible ending space
-                        $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
-                            || $ord_ch > ORD_PRINTABLE_MAX )
-                      )
-                    {
-                        $token =~ s/\s+$//g;
-                        $rtoken_vars->[_TOKEN_] = $token;
-                    }
+                    $self->[_ris_special_identifier_token_]->{$token} = 'sub';
                  }
  
                  # and trim spaces in package statements (added for c250)
@@ -18121,8 +18122,8 @@ EOM
              }
  
              # blank lines before subs except declarations and one-liners
-            # Fix for c250: added new type 'P'
-            elsif ( $leading_type eq 'i' || $leading_type eq 'P' ) {
+            # Fix for c250: added new type 'P', changed 'i' to 'S'
+            elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) {
                  my $special_identifier =
                    $self->[_ris_special_identifier_token_]->{$leading_token};
                  if ($special_identifier) {
@@ -21709,6 +21710,11 @@ sub break_lines_inner_loop {
      # Do not separate an isolated bare word from an opening paren.
      # Alternate Fix #2 for issue b1299.  This waits as long as possible
      # to make the decision.
+    # Note for fix #c250: to keep line breaks unchanged under -extrude when
+    # switching from 'i' to 'S' for subs, we would have to also check 'S', i.e.
+    # =~/^[Si]$/.  But this was never necessary at a sub signature, so we leave
+    # it alone and allow the new version to be different for --extrude. For a
+    # test file run perl527/signatures.t with --extrude.
      if ( $types_to_go[$i_begin] eq 'i'
          && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
      {
@@ -22400,7 +22406,8 @@ sub do_colon_breaks {
                  # always open comma lists not preceded by keywords,
                  # barewords, identifiers (that is, anything that doesn't
                  # look like a function call)
-                my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+                # c250: added new sub identifier type 'S'
+                my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiUS]$/;
  
                  $self->table_maker(
                      {
diff --git a/lib/Perl/Tidy/HtmlWriter.pm b/lib/Perl/Tidy/HtmlWriter.pm

index 7c751c62745b27e2fb4db2fe9e941cedd6fb681f..180c4aa128ec739d0cbf09d7b1fd90ef66e4fd17 100644 (file)
--- a/lib/Perl/Tidy/HtmlWriter.pm
+++ b/lib/Perl/Tidy/HtmlWriter.pm
@@ -370,6 +370,7 @@ BEGIN {
  
      # When adding NEW_TOKENS: update this hash table
      # $type => $short_name
+    # c250: changed 'M' to 'S'
      %token_short_names = (
          '#'  => 'c',
          'n'  => 'n',
@@ -390,7 +391,7 @@ BEGIN {
          'f'  => 'sc',
          '('  => 'p',
          ')'  => 'p',
-        'M'  => 'm',
+        'S'  => 'm',
          'pd' => 'pd',
          'A'  => 'co',
      );
@@ -1321,12 +1322,13 @@ sub markup_tokens {
          # Intercept a sub name here; split it
          # into keyword 'sub' and sub name; and add an
          # entry in the toc
+        # Fix for c250: switch from 'i' to 'S'
          #-------------------------------------------------------
-        if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
+        if ( $type eq 'S' && $token =~ /^(\w+\s+)(\w.*)$/ ) {
              $token = $self->markup_html_element( $1, 'k' );
              push @colored_tokens, $token;
              $token = $2;
-            $type  = 'M';
+            $type  = 'S';
  
              # but don't include sub declarations in the toc;
              # these will have leading token types 'i;'
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index 24634f76a931eb5e116d849e46e814af5b376b74..1a87fbbdde259eb03739c190418828513652771f 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -3104,7 +3104,7 @@ EOM
          # Added 'package' (can be 'class') for --use-feature=class (rt145706)
          if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
              $last_nonblank_token = $statement_type;
-            $last_nonblank_type  = 'i';
+            $last_nonblank_type  = 'S';               # c250 change
              $statement_type      = EMPTY_STRING;
          }
          elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
@@ -5281,7 +5281,7 @@ EOM
              # this pre-token will start an output token
              push( @{$routput_token_list}, $i_tok );
  
-            # The search for the full token ends in one of 5 main END NODES
+            # The search for the full token ends in one of 5 main END NODES:
  
              #-----------------------
              # END NODE 1: whitespace
@@ -5468,9 +5468,9 @@ EOM
                  next;
              }
  
-            #-----------------------------
-            # END NODE 5: all other tokens
-            #-----------------------------
+            #------------------------------------------
+            # END NODE 5: everything else (punctuation)
+            #------------------------------------------
              my $code = $tokenization_code->{$tok};
              if ($code) {
                  $code->($self);
@@ -5784,8 +5784,9 @@ BEGIN {
      # note: this is identical to '@value_requestor_type' defined later.
      # Fix for c250: add new type 'P' for package (expecting VERSION or {}
      # after package NAMESPACE, so expecting TERM)
+    # Fix for c250: add new type 'S' for sub (not expecting operator)
      my @q = qw(
-      ; ! + x & ?  F J - p / Y : % f U ~ A G j L P * . | ^ < = [ m { \ > t
+      ; ! + x & ?  F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { \ > t
        || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /=
        &= // >> ~. &. |. ^.
        ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
@@ -5805,7 +5806,7 @@ BEGIN {
      # 'i' is currently excluded because it might be a package
      # 'q' is currently excluded because it might be a prototype
      # Fix for c030: removed '->' from this list:
-    # Fix for c250: added 'i' after new type 'P' added
+    # Fix for c250: added 'i' because new type 'P' was added
      @q = qw( -- C h R ++ ] Q <> i );    ## n v q );
      push @q, ')';
      @{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
@@ -5902,7 +5903,8 @@ sub operator_expected {
      # Types 'n', 'v', 'q' also depend on context.
  
      # identifier...
-    # Fix for c250: type 'i' and new type 'P' are in the hash table now
+    # Fix for c250: removed coding for type 'i' because 'i' and new type 'P'
+    # are now done by hash table lookup
  
      # keyword...
      if ( $last_nonblank_type eq 'k' ) {
@@ -6260,21 +6262,19 @@ sub code_block_type {
      }
  
      # or a sub or package BLOCK
-    # Fixed for c250 to include new package type 'P'
-    # FIXME: this could use optimization
+    # Fixed for c250 to include new package type 'P', and change 'i' to 'S'
      elsif (
-        (
-               $last_nonblank_type eq 'i'
-            || $last_nonblank_type eq 't'
-            || $last_nonblank_type eq 'P'
-        )
-        && $last_nonblank_token =~ /^(sub|package)\b/
+           $last_nonblank_type eq 'P'
+        || $last_nonblank_type eq 'S'
+        || ( $last_nonblank_type eq 't'
+            && substr( $last_nonblank_token, 0, 3 ) eq 'sub' )
        )
      {
          return $last_nonblank_token;
      }
  
      # or a sub alias
+    # FIXME: see if this is really needed after the c250 update
      elsif (( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
          && ( $is_sub{$last_nonblank_token} ) )
      {
@@ -8670,7 +8670,7 @@ EOM
              my $pos  = pos($input_line);
              my $numc = $pos - $pos_beg;
              $tok  = 'sub ' . substr( $input_line, $pos_beg, $numc );
-            $type = 'i';
+            $type = 'S';    ## Fix for c250, was 'i';
  
              # remember the sub name in case another call is needed to
              # get the prototype
@@ -8724,7 +8724,7 @@ EOM
              # Patch part #1 to fixes cases b994 and b1053:
              # Mark an anonymous sub keyword without prototype as type 'k', i.e.
              #    'sub : lvalue { ...'
-            $type = 'i';
+            $type = 'S';    ## C250, was 'i';
              if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
          }
  
@@ -10075,8 +10075,8 @@ The following additional token types are defined:
      C    user-defined constant or constant function (with void prototype = ())
      U    user-defined function taking parameters
      G    user-defined function taking block parameter (like grep/map/eval)
-    M    (unused, but reserved for subroutine definition name)
-    P    package definition
+    S    sub definition     (reported as type 'i' in older versions)
+    P    package definition (reported as type 'i' in older versions)
      t    type indicater such as %,$,@,*,&,sub
      w    bare word (perhaps a subroutine call)
      i    identifier of some type (with leading %, $, @, *, &, sub, -> )
@@ -10142,9 +10142,9 @@ BEGIN {
  
      # make a hash of all valid token types for self-checking the tokenizer
      # (adding NEW_TOKENS : select a new character and add to this list)
-    # fix for c250: added new token type 'P'
+    # fix for c250: added new token type 'P' and 'S'
      my @valid_token_types = qw#
-      A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P
+      A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S
        { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
        #;
      push( @valid_token_types, @digraphs );
author	Steve Hancock <perltidy@users.sourceforge.net>
	Fri, 18 Aug 2023 19:21:00 +0000 (12:21 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Fri, 18 Aug 2023 19:21:00 +0000 (12:21 -0700)
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/HtmlWriter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history