Correct brace types mismarked by tokenizer

author Steve Hancock <perltidy@users.sourceforge.net>

Wed, 14 Apr 2021 03:51:42 +0000 (20:51 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Wed, 14 Apr 2021 03:51:42 +0000 (20:51 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Wed, 14 Apr 2021 03:51:42 +0000 (20:51 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Wed, 14 Apr 2021 03:51:42 +0000 (20:51 -0700)
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index a273e9f8111aa778da6277c7660e8812f9c02120..6cc468c6bbc02454755d422029e4588c9c11a761 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -5957,34 +5957,59 @@ sub respace_tokens {
          my $line_diff = $lx_close - $lx_open;
          $ris_broken_container->{$seqno} = $line_diff;
  
-        # Handle code blocks
+        # See if this is a list
+        my $is_list;
+        my $rtype_count = $rtype_count_by_seqno->{$seqno};
+        if ($rtype_count) {
+            my $comma_count     = $rtype_count->{','};
+            my $fat_comma_count = $rtype_count->{'=>'};
+            my $semicolon_count = $rtype_count->{';'};
+
+            # We will define a list to be a container with one or more commas
+            # and no semicolons.
+            $is_list =
+              ( $comma_count || $fat_comma_count ) && !$semicolon_count;
+
+        }
+
+        # Look for a block brace marked as uncertain.  If the tokenizer thinks
+        # its guess is uncertain for the type of a brace following an unknown
+        # bareword then it adds a trailing space as a signal.  We can fix the
+        # type here now that we have had a better look at the contents of the
+        # container. This fixes case b1085. To find the corresponding code in
+        # Tokenizer.pm search for 'b1085' with an editor.
          my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
-        if ($block_type) {
+        if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
  
-            # The -lp option needs to know if a container holds a code block
-            next unless ($rOpts_line_up_parentheses);
+            # Always remove the trailing space
+            $block_type =~ s/\s+$//;
  
-            my $seqno_parent = $rparent_of_seqno->{$seqno};
-            while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
-                $rhas_code_block->{$seqno_parent}        = 1;
-                $rhas_broken_code_block->{$seqno_parent} = $line_diff;
-                $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+            # Try to filter out parenless sub calls
+            my ( $Knn1, $Knn2 );
+            my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
+            $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
+            $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
+            $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
+            $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
+            if ( $type_nn1 eq 'w' && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
+                $is_list = 0;
              }
  
-            next;
+            # Convert to a hash brace if it looks like it holds a list
+            if ($is_list) {
+
+                $block_type = "";
+
+                $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
+                $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
+            }
+
+            $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type;
+            $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type;
          }
  
-        # Handle lists
-        my $rtype_count = $rtype_count_by_seqno->{$seqno};
-        next unless ($rtype_count);
-        my $comma_count     = $rtype_count->{','};
-        my $fat_comma_count = $rtype_count->{'=>'};
-        my $semicolon_count = $rtype_count->{';'};
-
-        # We will define a list to be a container with one or more commas and
-        # no semicolons.
-        my $is_list = ( $comma_count || $fat_comma_count ) && !$semicolon_count;
-        if ($is_list) {
+        # Handle a list container
+        if ( $is_list && !$block_type ) {
              $ris_list_by_seqno->{$seqno} = $seqno;
              my $seqno_parent = $rparent_of_seqno->{$seqno};
              my $depth        = 0;
@@ -6021,6 +6046,17 @@ sub respace_tokens {
                  $seqno_parent = $rparent_of_seqno->{$seqno_parent};
              }
          }
+
+        # Handle code blocks ...
+        # The -lp option needs to know if a container holds a code block
+        elsif ( $block_type && $rOpts_line_up_parentheses ) {
+            my $seqno_parent = $rparent_of_seqno->{$seqno};
+            while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+                $rhas_code_block->{$seqno_parent}        = 1;
+                $rhas_broken_code_block->{$seqno_parent} = $line_diff;
+                $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+            }
+        }
      }
  
      # Find containers with ternaries, needed for -lp formatting.
@@ -22063,3 +22099,4 @@ sub wrapup {
  
  } ## end package Perl::Tidy::Formatter
  1;
+
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index d7856d9ff7854cec0586d2b40364eaeddaa80691..b1fe0741a5ec5709fb71a09829f8be64d4101e81 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -5352,6 +5352,16 @@ sub decide_if_code_block {
                  $code_block_type = "";
              }
          }
+
+        if ( $code_block_type && $pre_types[$j] eq '}' ) {
+
+            # Patch for case b1085: if we hit the sentinal token then it is
+            # uncertain if this is a block.  If this brace follows a bareword,
+            # then append a space as a signal to the formatter that this may
+            # not be a block brace.  To find the corresponding code in
+            # Formatter.pm search for 'b1085'.
+            $code_block_type .= " " if ( $code_block_type =~ /^\w/ );
+        }
      }
  
      return $code_block_type;
@@ -7698,7 +7708,7 @@ sub scan_number_do {
             |([0-7_]+               # string of octal digits 
             (\.([0-7][0-7_]*)?)?    # optional decimal and fraction
             [Pp][+-]?[0-7]          # REQUIRED exponent, no underscore
-           [0-7_]*)                # Additional exponent digits, with underscores
+           [0-7_]*)                # Additional exponent digits with underscores
  
             # or octal integer
             |([0-7_]+)               # string of octal digits 
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod

index c9b3405e17b1dbd6ea0c547a522442841c3a109d..846cac82785753e823c9c0e394c8ea474e0f82b3 100644 (file)
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -3,6 +3,17 @@
  
  =over 4
  
+=item B<Correct brace types mismarked by tokenizer>
+
+Testing with random parameters produced a case in which a brace following an
+unknown bareword was marked by the tokenizer as a code block brace rather than
+a hash brace.  This can cause poor formatting.  The problem was solved by
+having the tokenizer send a signal to the formatter if a block type was
+guessed.  The formatter has more information and can fix the problem.  This
+fixes case b1085.
+
+11 Apr 2021.
+
  =item B<Unify coding for welded quotes and other welded containers>
  
  Random testing produced some cases where welded quotes were not converging.
@@ -12,7 +23,7 @@ to a new common sub.
  
  This update fixes cases b1066 b1067 b1071 b1079 b1080.
  
-10 Apr 2021.
+10 Apr 2021, 5d73dd5.
  
  =item B<Slight change in weld length calculation>
author	Steve Hancock <perltidy@users.sourceforge.net>
	Wed, 14 Apr 2021 03:51:42 +0000 (20:51 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Wed, 14 Apr 2021 03:51:42 +0000 (20:51 -0700)
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history
local-docs/BugLog.pod		patch \| blob \| history