From 3cf92a1ec83e467079d8854c5bdf3a2d814ff762 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Tue, 20 Oct 2020 08:24:36 -0700
Subject: [PATCH] improved filters for sub is_essential_whitespace

---
 lib/Perl/Tidy/Formatter.pm | 108 +++++++++++++++++++++++++------------
 1 file changed, 73 insertions(+), 35 deletions(-)

diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 98f5196f..d2761fee 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -179,7 +179,6 @@ my (
     %is_equal_or_fat_comma,
     %is_block_with_ci,
     %is_comma_or_fat_comma,
-    %essential_whitespace_not_following,
 
     # Initialized in check_options. These are constants and could
     # just as well be initialized in a BEGIN block.
@@ -538,14 +537,6 @@ BEGIN {
     @q = qw( do sub eval sort map grep );
     @is_block_with_ci{@q} = (1) x scalar(@q);
 
-    # These are used as a speedup filter for sub is_essential_whitespace.
-    # No space is needed after them except for a here doc.
-    @q = qw( ; { } [ ] );
-    push @q, ',';
-    push @q, ')';
-    push @q, '(';
-    @essential_whitespace_not_following{@q} = (1) x scalar(@q);
-
 }
 
 {    ## begin closure to count instanes
@@ -2150,6 +2141,10 @@ EOM
     my %is_for_foreach;
     my %is_digraph;
     my %is_trigraph;
+    my %essential_whitespace_filter_l1;
+    my %essential_whitespace_filter_r1;
+    my %essential_whitespace_filter_l2;
+    my %essential_whitespace_filter_r2;
 
     BEGIN {
 
@@ -2168,6 +2163,32 @@ EOM
 
         @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
         @is_trigraph{@q} = (1) x scalar(@q);
+
+        # These are used as a speedup filters for sub is_essential_whitespace.
+
+        # Filter 1:
+        # These left side token types USUALLY do not require a space:
+        @q = qw( ; { } [ ] L R );
+        push @q, ',';
+        push @q, ')';
+        push @q, '(';
+        @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
+
+        # BUT some might if followed by these right token types
+        @q = qw( pp mm << <<= h );
+        @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
+
+        # Filter 2:
+        # These right side filters usually do not require a space
+        @q = qw( ; ] R } );
+        push @q, ',';
+        push @q, ')';
+        @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
+
+        # BUT some might if followed by these left token types
+        @q = qw( h Z );
+        @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
+
     }
 
     sub is_essential_whitespace {
@@ -2179,19 +2200,43 @@ EOM
         # ($tokenr, $typer) is the token to the right of the space in question
         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
         #
-        # This is a slow routine but is not needed too often except when -mangle
-        # is used.
-        #
-        # Note: This routine should almost never need to be changed.  It is
+        # Note1: This routine should almost never need to be changed.  It is
         # for avoiding syntax problems rather than for formatting.
 
+	# Note2: The -mangle option causes large numbers of calls to this
+	# routine and therefore is a good test. So if a change is made, be sure
+	# to run a large number of files with the -mangle option and check for
+	# differences.
+
         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
 
-        # speedups
-	# The first is not really essential but retained to keep formatting
-	# unchanged.
-        return 1 if ( $typer eq 'h' ); 
-        return   if ( $essential_whitespace_not_following{$typel} );
+	# This is potentially a very slow routine but the following quick
+	# filters typically catch and handle over 90% of the calls.
+
+        # Filter 1: usually no space required after common types ; , [ ] { } ( )
+        return
+          if ( $essential_whitespace_filter_l1{$typel}
+            && !$essential_whitespace_filter_r1{$typer} );
+
+        # Filter 2: usually no space before common types ; ,
+        return
+          if ( $essential_whitespace_filter_r2{$typer}
+            && !$essential_whitespace_filter_l2{$typel} );
+
+	# Filter 3: Handle side comments: a space is only essential if the left
+	# token ends in '$' For example, we do not want to create $#foo below:
+
+        #   sub t086
+        #       ( #foo)))
+        #       $ #foo)))
+        #       a #foo)))
+        #       ) #foo)))
+        #       { ... }
+
+        if ($typer eq '#' ) {
+            return 1 if ($tokenl && substr($tokenl,-1) eq '$');
+            return;
+        }
 
         my $tokenr_is_bareword   = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
         my $tokenr_is_open_paren = $tokenr eq '(';
@@ -2871,9 +2916,9 @@ EOM
         # example, to force long string of conditional operators to break
         # with each line ending in a ':', we can add a small number to the
         # bond strength of each ':' (colon.t)
-        @bias_tokens = qw( : && || f and or . );   # tokens which get bias
+        @bias_tokens = qw( : && || f and or . );       # tokens which get bias
         %bias_hash   = map { $_ => 0 } @bias_tokens;
-        $delta_bias  = 0.0001;                     # a very small strength level
+        $delta_bias  = 0.0001;    # a very small strength level
         return;
 
     } ## end sub initialize_bond_strength_hashes
@@ -4883,14 +4928,7 @@ sub respace_tokens {
 
             if (
 
-		# The call to is_essential_whitespace is very slow, so the
-		# following filter is used to eliminate most calls.
-                (
-                      !$essential_whitespace_not_following{$type_p}
-                    || $type_next eq 'h'
-                )
-
-                && is_essential_whitespace(
+                is_essential_whitespace(
                     $token_pp, $type_pp,    $token_p,
                     $type_p,   $token_next, $type_next,
                 )
@@ -5933,7 +5971,7 @@ sub find_nested_pairs {
           )
         {
             next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
-            if ( !$nonblank_count ) { $Kn_first = $Kn }
+            if ( !$nonblank_count )        { $Kn_first = $Kn }
             if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
 
             # skip chain of identifier tokens
@@ -5957,8 +5995,8 @@ sub find_nested_pairs {
                 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
 
             # anonymous sub + prototype or sig:  )->then( sub ($code) {
-	    # ... but it seems best not to stack two structural blocks, like
-	    # this
+            # ... but it seems best not to stack two structural blocks, like
+            # this
             #    sub make_anon_with_my_sub { sub {
             # because it probably hides the structure a little too much.
             || (   $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub'
@@ -9012,7 +9050,7 @@ sub starting_one_line_block {
         # If this brace follows a parenthesized list, we should look back to
         # find the keyword before the opening paren because otherwise we might
         # form a one line block which stays intack, and cause the parenthesized
-        # expression to break open. That looks bad.  
+        # expression to break open. That looks bad.
         if ( $tokens_to_go[$i_start] eq ')' ) {
 
             # Find the opening paren
@@ -9885,9 +9923,9 @@ EOM
             # otherwise use multiple lines
             else {
 
-		# add a couple of extra terminal blank tokens if we haven't
-		# already done so
-                $self->pad_array_to_go() unless ($called_pad_array_to_go); 
+                # add a couple of extra terminal blank tokens if we haven't
+                # already done so
+                $self->pad_array_to_go() unless ($called_pad_array_to_go);
 
                 ( $ri_first, $ri_last ) =
                   $self->set_continuation_breaks( $saw_good_break,
-- 
2.39.5