fix issue c250 part 1, new package token type P

author Steve Hancock <perltidy@users.sourceforge.net>

Thu, 17 Aug 2023 14:42:31 +0000 (07:42 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Thu, 17 Aug 2023 14:42:31 +0000 (07:42 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Thu, 17 Aug 2023 14:42:31 +0000 (07:42 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Thu, 17 Aug 2023 14:42:31 +0000 (07:42 -0700)
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index f44a2200c14f625b9922fff6ad89f81040c56caf..ed0139a7d1383c4dec1c2df6c90f8b287b1b344e 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -2801,10 +2801,11 @@ sub initialize_whitespace_hashes {
      # simple as adding your new letter to @spaces_both_sides, for
      # example.
  
+    # fix for c250: added space rules new package type 'P'
      my @spaces_both_sides = qw#
        + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
        .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
-      &&= ||= //= <=> A k f w F n C Y U G v
+      &&= ||= //= <=> A k f w F n C Y U G v P
        #;
  
      my @spaces_left_side = qw<
@@ -4048,6 +4049,8 @@ EOM
      # These routines and variables are involved in deciding where to break very
      # long lines.
  
+    # NEW_TOKENS must add bond strength rules
+
      my %is_good_keyword_breakpoint;
      my %is_lt_gt_le_ge;
      my %is_container_token;
@@ -4179,6 +4182,10 @@ EOM
          $left_bond_strength{'CORE::'}  = NOMINAL;
          $right_bond_strength{'CORE::'} = NO_BREAK;
  
+        # Fix for c250: added strengths for new type 'P'
+        $left_bond_strength{'P'}  = NOMINAL;
+        $right_bond_strength{'P'} = NOMINAL;
+
          # breaking AFTER modulus operator is ok:
          @q = qw< % >;
          @left_bond_strength{@q} = (STRONG) x scalar(@q);
@@ -6442,9 +6449,10 @@ sub find_selected_packages {
      foreach my $KK ( 0 .. $Klimit ) {
          my $item = $rLL->[$KK];
          my $type = $item->[_TYPE_];
-        if ( $type ne 'i' ) {
-            next;
-        }
+
+        # fix for c250: package type has changed from 'i' to 'P'
+        next if ( $type ne 'P' );
+
          my $token = $item->[_TOKEN_];
          if (   substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/
              || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ )
@@ -8075,7 +8083,7 @@ sub dump_verbatim {
  
  my %wU;
  my %wiq;
-my %is_wit;
+my %is_witP;
  my %is_sigil;
  my %is_nonlist_keyword;
  my %is_nonlist_type;
@@ -8092,8 +8100,8 @@ BEGIN {
      @q = qw(w i q Q G C Z);
      @{wiq}{@q} = (1) x scalar(@q);
  
-    @q = qw(w i t);
-    @{is_wit}{@q} = (1) x scalar(@q);
+    @q = qw(w i t P);    # Fix for c250: added new type 'P', formerly 'i'
+    @{is_witP}{@q} = (1) x scalar(@q);
  
      @q = qw($ & % * @);
      @{is_sigil}{@q} = (1) x scalar(@q);
@@ -8632,7 +8640,7 @@ sub respace_tokens_inner_loop {
          # The following is not yet done, but could be:
          #   sub (x x x)
          #     ( $type =~ /^[wit]$/ )
-        elsif ( $is_wit{$type} ) {
+        elsif ( $is_witP{$type} ) {
  
              # index() is several times faster than a regex test with \s here
              ##   $token =~ /\s/
@@ -8688,16 +8696,6 @@ sub respace_tokens_inner_loop {
  
                      }
  
-                    # clean up spaces in package identifiers, like
-                    #   "package        Bob::Dog;"
-                    elsif ( $token =~ /^(package|class)\s/ ) {
-                        $token =~ s/\s+/ /g;
-                        $rtoken_vars->[_TOKEN_] = $token;
-
-                        $self->[_ris_special_identifier_token_]->{$token} =
-                          'package';
-                    }
-
                      # trim identifiers of trailing blanks which can occur
                      # under some unusual circumstances, such as if the
                      # identifier 'witch' has trailing blanks on input here:
@@ -8718,6 +8716,18 @@ sub respace_tokens_inner_loop {
                          $rtoken_vars->[_TOKEN_] = $token;
                      }
                  }
+
+                # and trim spaces in package statements (added for c250)
+                elsif ( $type eq 'P' ) {
+
+                    # clean up spaces in package identifiers, like
+                    #   "package        Bob::Dog;"
+                    if ( $token =~ s/\s+/ /g ) {
+                        $rtoken_vars->[_TOKEN_] = $token;
+                        $self->[_ris_special_identifier_token_]->{$token} =
+                          'package';
+                    }
+                }
              }
          }
  
@@ -18111,7 +18121,8 @@ EOM
              }
  
              # blank lines before subs except declarations and one-liners
-            elsif ( $leading_type eq 'i' ) {
+            # Fix for c250: added new type 'P'
+            elsif ( $leading_type eq 'i' || $leading_type eq 'P' ) {
                  my $special_identifier =
                    $self->[_ris_special_identifier_token_]->{$leading_token};
                  if ($special_identifier) {
diff --git a/lib/Perl/Tidy/HtmlWriter.pm b/lib/Perl/Tidy/HtmlWriter.pm

index c5f0b315fe408fcae621b2d794e8560b621dfe34..7c751c62745b27e2fb4db2fe9e941cedd6fb681f 100644 (file)
--- a/lib/Perl/Tidy/HtmlWriter.pm
+++ b/lib/Perl/Tidy/HtmlWriter.pm
@@ -391,12 +391,14 @@ BEGIN {
          '('  => 'p',
          ')'  => 'p',
          'M'  => 'm',
-        'P'  => 'pd',
+        'pd' => 'pd',
          'A'  => 'co',
      );
  
      # These token types will all be called identifiers for now
-    my @identifier = qw< i t U C Y Z G :: CORE::>;
+    # Fix for c250: added new type 'P', formerly 'i'
+    # ( but package statements will eventually be split into 'k' and 'i')
+    my @identifier = qw< i t U C Y Z G P :: CORE::>;
      @token_short_names{@identifier} = ('i') x scalar(@identifier);
  
      # These token types will be called 'structure'
@@ -1341,7 +1343,8 @@ sub markup_tokens {
          # into keyword 'package' and name; add to the toc,
          # and update the package stack
          #-------------------------------------------------------
-        if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
+        # Fix for c250: switch from 'i' to 'P' and allow 'class' or 'package'
+        if ( $type eq 'P' && $token =~ /^(\w+\s+)(\w.*)$/ ) {
              $token = $self->markup_html_element( $1, 'k' );
              push @colored_tokens, $token;
              $token = $2;
@@ -1459,7 +1462,10 @@ sub write_line {
              $self->add_toc_item( '__DATA__', '__DATA__' );
          }
          elsif ( $line_type =~ /^POD/ ) {
-            $line_character = 'P';
+
+            # fix for c250: changed 'P' to 'pd' here and in %token_short_names
+            # to allow use of 'P' as new package token type
+            $line_character = 'pd';
              if ( $rOpts->{'pod2html'} ) {
                  my $html_pod_fh = $self->{_html_pod_fh};
                  if ( $line_type eq 'POD_START' ) {
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index 83c810750681e3207d4c4188f5342b7eca563d13..24634f76a931eb5e116d849e46e814af5b376b74 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -3100,13 +3100,18 @@ EOM
          $container_type = EMPTY_STRING;
  
          # ATTRS: for a '{' following an attribute list, reset
-        # things to look like we just saw the sub name
+        # things to look like we just saw a sub name
          # Added 'package' (can be 'class') for --use-feature=class (rt145706)
-        if ( $statement_type =~ /^(sub|package)\b/ ) {
+        if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
              $last_nonblank_token = $statement_type;
              $last_nonblank_type  = 'i';
              $statement_type      = EMPTY_STRING;
          }
+        elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
+            $last_nonblank_token = $statement_type;
+            $last_nonblank_type  = 'P';               # c250 change
+            $statement_type      = EMPTY_STRING;
+        }
  
          # patch for SWITCH/CASE: hide these keywords from an immediately
          # following opening brace
@@ -5276,16 +5281,16 @@ EOM
              # this pre-token will start an output token
              push( @{$routput_token_list}, $i_tok );
  
-            # The search for the full token ends in one of 5 main end NODES
+            # The search for the full token ends in one of 5 main END NODES
  
-            #----------------------------------
-            # NODE 1: handle a whitespace token
-            #----------------------------------
+            #-----------------------
+            # END NODE 1: whitespace
+            #-----------------------
              next if ( $pre_type eq 'b' );
  
-            #-------------------------
-            # NODE 2: handle a comment
-            #-------------------------
+            #----------------------
+            # END NODE 2: a comment
+            #----------------------
              last if ( $pre_type eq '#' );
  
              # continue gathering identifier if necessary
@@ -5442,9 +5447,9 @@ EOM
              # Now we have to examine this token and decide what it is
              # and define its $type
  
-            #---------------------------
-            # NODE 3: handle a bare word
-            #---------------------------
+            #------------------------
+            # END NODE 3: a bare word
+            #------------------------
              if ( $pre_type eq 'w' ) {
                  my $is_last = $self->do_BAREWORD($is_END_or_DATA);
                  last if ($is_last);
@@ -5455,17 +5460,17 @@ EOM
              # Added '#' to fix c038 (later moved above).
              $self->[_in_attribute_list_] &&= 0;
  
-            #----------------------------------
-            # NODE 4: handle a string of digits
-            #----------------------------------
+            #-------------------------------
+            # END NODE 4: a string of digits
+            #-------------------------------
              if ( $pre_type eq 'd' ) {
                  $self->do_DIGITS();
                  next;
              }
  
-            #--------------------------------
-            # NODE 5: handle all other tokens
-            #--------------------------------
+            #-----------------------------
+            # END NODE 5: all other tokens
+            #-----------------------------
              my $code = $tokenization_code->{$tok};
              if ($code) {
                  $code->($self);
@@ -5777,8 +5782,10 @@ BEGIN {
  
      # Always expecting TERM following these types:
      # note: this is identical to '@value_requestor_type' defined later.
+    # Fix for c250: add new type 'P' for package (expecting VERSION or {}
+    # after package NAMESPACE, so expecting TERM)
      my @q = qw(
-      ; ! + x & ?  F J - p / Y : % f U ~ A G j L * . | ^ < = [ m { \ > t
+      ; ! + x & ?  F J - p / Y : % f U ~ A G j L P * . | ^ < = [ m { \ > t
        || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /=
        &= // >> ~. &. |. ^.
        ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
@@ -5798,7 +5805,8 @@ BEGIN {
      # 'i' is currently excluded because it might be a package
      # 'q' is currently excluded because it might be a prototype
      # Fix for c030: removed '->' from this list:
-    @q = qw( -- C h R ++ ] Q <> );    ## n v q i );
+    # Fix for c250: added 'i' after new type 'P' added
+    @q = qw( -- C h R ++ ] Q <> i );    ## n v q );
      push @q, ')';
      @{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
  
@@ -5891,25 +5899,13 @@ sub operator_expected {
      #---------------------------------------------
  
      # Types 'k', '}' and 'Z' depend on context
-    # Types 'i', 'n', 'v', 'q' currently also temporarily depend on context.
+    # Types 'n', 'v', 'q' also depend on context.
  
      # identifier...
-    if ( $last_nonblank_type eq 'i' ) {
-        $op_expected = OPERATOR;
-
-        # TODO: it would be cleaner to make this a special type
-        # expecting VERSION or {} after package NAMESPACE;
-        # maybe mark these words as type 'Y'?
-        if (   substr( $last_nonblank_token, 0, 7 ) eq 'package'
-            && $statement_type      =~ /^package\b/
-            && $last_nonblank_token =~ /^package\b/ )
-        {
-            $op_expected = TERM;
-        }
-    }
+    # Fix for c250: type 'i' and new type 'P' are in the hash table now
  
      # keyword...
-    elsif ( $last_nonblank_type eq 'k' ) {
+    if ( $last_nonblank_type eq 'k' ) {
          $op_expected = TERM;
          if ( $expecting_operator_token{$last_nonblank_token} ) {
              $op_expected = OPERATOR;
@@ -6264,8 +6260,16 @@ sub code_block_type {
      }
  
      # or a sub or package BLOCK
-    elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
-        && $last_nonblank_token =~ /^(sub|package)\b/ )
+    # Fixed for c250 to include new package type 'P'
+    # FIXME: this could use optimization
+    elsif (
+        (
+               $last_nonblank_type eq 'i'
+            || $last_nonblank_type eq 't'
+            || $last_nonblank_type eq 'P'
+        )
+        && $last_nonblank_token =~ /^(sub|package)\b/
+      )
      {
          return $last_nonblank_token;
      }
@@ -7586,7 +7590,7 @@ sub do_scan_package {
          my $pos  = pos($input_line);
          my $numc = $pos - $pos_beg;
          $tok  = 'package ' . substr( $input_line, $pos_beg, $numc );
-        $type = 'i';
+        $type = 'P';    # Fix for c250, previously 'i'
  
          # Now we must convert back from character position
          # to pre_token index.
@@ -10072,7 +10076,7 @@ The following additional token types are defined:
      U    user-defined function taking parameters
      G    user-defined function taking block parameter (like grep/map/eval)
      M    (unused, but reserved for subroutine definition name)
-    P    (unused, but -html uses it to label pod text)
+    P    package definition
      t    type indicater such as %,$,@,*,&,sub
      w    bare word (perhaps a subroutine call)
      i    identifier of some type (with leading %, $, @, *, &, sub, -> )
@@ -10138,8 +10142,9 @@ BEGIN {
  
      # make a hash of all valid token types for self-checking the tokenizer
      # (adding NEW_TOKENS : select a new character and add to this list)
+    # fix for c250: added new token type 'P'
      my @valid_token_types = qw#
-      A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
+      A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P
        { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
        #;
      push( @valid_token_types, @digraphs );
author	Steve Hancock <perltidy@users.sourceforge.net>
	Thu, 17 Aug 2023 14:42:31 +0000 (07:42 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Thu, 17 Aug 2023 14:42:31 +0000 (07:42 -0700)
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/HtmlWriter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history