From 7d23bf444503eaaf8a758744dccc86548c661a19 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Tue, 13 Apr 2021 20:51:42 -0700
Subject: [PATCH] Correct brace types mismarked by tokenizer

---
 lib/Perl/Tidy/Formatter.pm | 79 ++++++++++++++++++++++++++++----------
 lib/Perl/Tidy/Tokenizer.pm | 12 +++++-
 local-docs/BugLog.pod      | 13 ++++++-
 3 files changed, 81 insertions(+), 23 deletions(-)

diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index a273e9f8..6cc468c6 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -5957,34 +5957,59 @@ sub respace_tokens {
         my $line_diff = $lx_close - $lx_open;
         $ris_broken_container->{$seqno} = $line_diff;
 
-        # Handle code blocks
+        # See if this is a list
+        my $is_list;
+        my $rtype_count = $rtype_count_by_seqno->{$seqno};
+        if ($rtype_count) {
+            my $comma_count     = $rtype_count->{','};
+            my $fat_comma_count = $rtype_count->{'=>'};
+            my $semicolon_count = $rtype_count->{';'};
+
+            # We will define a list to be a container with one or more commas
+            # and no semicolons.
+            $is_list =
+              ( $comma_count || $fat_comma_count ) && !$semicolon_count;
+
+        }
+
+        # Look for a block brace marked as uncertain.  If the tokenizer thinks
+        # its guess is uncertain for the type of a brace following an unknown
+        # bareword then it adds a trailing space as a signal.  We can fix the
+        # type here now that we have had a better look at the contents of the
+        # container. This fixes case b1085. To find the corresponding code in
+        # Tokenizer.pm search for 'b1085' with an editor.
         my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
-        if ($block_type) {
+        if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
 
-            # The -lp option needs to know if a container holds a code block
-            next unless ($rOpts_line_up_parentheses);
+            # Always remove the trailing space
+            $block_type =~ s/\s+$//;
 
-            my $seqno_parent = $rparent_of_seqno->{$seqno};
-            while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
-                $rhas_code_block->{$seqno_parent}        = 1;
-                $rhas_broken_code_block->{$seqno_parent} = $line_diff;
-                $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+            # Try to filter out parenless sub calls
+            my ( $Knn1, $Knn2 );
+            my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
+            $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
+            $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
+            $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
+            $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
+            if ( $type_nn1 eq 'w' && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
+                $is_list = 0;
             }
 
-            next;
+            # Convert to a hash brace if it looks like it holds a list
+            if ($is_list) {
+
+                $block_type = "";
+
+                $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
+                $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
+            }
+
+            $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type;
+            $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type;
         }
 
-        # Handle lists
-        my $rtype_count = $rtype_count_by_seqno->{$seqno};
-        next unless ($rtype_count);
-        my $comma_count     = $rtype_count->{','};
-        my $fat_comma_count = $rtype_count->{'=>'};
-        my $semicolon_count = $rtype_count->{';'};
-
-        # We will define a list to be a container with one or more commas and
-        # no semicolons.
-        my $is_list = ( $comma_count || $fat_comma_count ) && !$semicolon_count;
-        if ($is_list) {
+        # Handle a list container
+        if ( $is_list && !$block_type ) {
             $ris_list_by_seqno->{$seqno} = $seqno;
             my $seqno_parent = $rparent_of_seqno->{$seqno};
             my $depth        = 0;
@@ -6021,6 +6046,17 @@ sub respace_tokens {
                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
             }
         }
+
+        # Handle code blocks ...
+        # The -lp option needs to know if a container holds a code block
+        elsif ( $block_type && $rOpts_line_up_parentheses ) {
+            my $seqno_parent = $rparent_of_seqno->{$seqno};
+            while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+                $rhas_code_block->{$seqno_parent}        = 1;
+                $rhas_broken_code_block->{$seqno_parent} = $line_diff;
+                $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+            }
+        }
     }
 
     # Find containers with ternaries, needed for -lp formatting.
@@ -22063,3 +22099,4 @@ sub wrapup {
 
 } ## end package Perl::Tidy::Formatter
 1;
+
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index d7856d9f..b1fe0741 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -5352,6 +5352,16 @@ sub decide_if_code_block {
                 $code_block_type = "";
             }
         }
+
+        if ( $code_block_type && $pre_types[$j] eq '}' ) {
+
+            # Patch for case b1085: if we hit the sentinal token then it is
+            # uncertain if this is a block.  If this brace follows a bareword,
+            # then append a space as a signal to the formatter that this may
+            # not be a block brace.  To find the corresponding code in
+            # Formatter.pm search for 'b1085'.
+            $code_block_type .= " " if ( $code_block_type =~ /^\w/ );
+        }
     }
 
     return $code_block_type;
@@ -7698,7 +7708,7 @@ sub scan_number_do {
            |([0-7_]+               # string of octal digits 
            (\.([0-7][0-7_]*)?)?    # optional decimal and fraction
            [Pp][+-]?[0-7]          # REQUIRED exponent, no underscore
-           [0-7_]*)                # Additional exponent digits, with underscores
+           [0-7_]*)                # Additional exponent digits with underscores
 
            # or octal integer
            |([0-7_]+)               # string of octal digits 
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod
index c9b3405e..846cac82 100644
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -3,6 +3,17 @@
 
 =over 4
 
+=item B<Correct brace types mismarked by tokenizer>
+
+Testing with random parameters produced a case in which a brace following an
+unknown bareword was marked by the tokenizer as a code block brace rather than
+a hash brace.  This can cause poor formatting.  The problem was solved by
+having the tokenizer send a signal to the formatter if a block type was
+guessed.  The formatter has more information and can fix the problem.  This
+fixes case b1085.
+
+11 Apr 2021.
+
 =item B<Unify coding for welded quotes and other welded containers>
 
 Random testing produced some cases where welded quotes were not converging.
@@ -12,7 +23,7 @@ to a new common sub.
 
 This update fixes cases b1066 b1067 b1071 b1079 b1080.
 
-10 Apr 2021.
+10 Apr 2021, 5d73dd5.
 
 =item B<Slight change in weld length calculation>
 
-- 
2.39.5