From 3a91e634504cd50eb570f7fc0c049cdb91e36b5e Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Thu, 17 Dec 2020 16:12:25 -0800
Subject: [PATCH] rewrote sub is_list() to allow future generalization

---
 bin/perltidy               | 22 ++++++++++++++++++++
 lib/Perl/Tidy/Formatter.pm | 41 +++++++++++++++++++-------------------
 2 files changed, 42 insertions(+), 21 deletions(-)
diff --git a/bin/perltidy b/bin/perltidy
index 57113b91..f4c2c833 100755
--- a/bin/perltidy
+++ b/bin/perltidy
@@ -1191,6 +1191,28 @@ If formatted in this way, the program will not run (at least with recent version
 
 Related issues arise with other binary operator symbols, such as + and -, and in older versions of perl there could be problems with ternary operators.  So to avoid changing program behavior, perltidy has the simple rule that whitespace around possible filehandles is left unchanged.  Likewise, whitespace around barewords is left unchanged.  The reason is that if the barewords are defined in other modules, or in code that has not even been written yet, perltidy will not have seen their prototypes and must treat them cautiously.
 
+In perltidy this is implemented in the tokenizer by marking token following a
+B<print> keyword as a special type B<Z>.  When formatting is being done,
+whitespace following this token type is generally left unchanged as a precaution
+against changing program behavior.  This is excessively conservative but simple
+and easy to implement.  Keywords which are treated similarly to B<print> include
+B<printf>, B<sort>, B<exec>, B<system>.  Changes in spacing around parameters
+following these keywords may have to be made manually.  For example, the space,
+or lack of space, after the parameter $foo in the following line will be
+unchanged in formatting.
+
+   system($foo );
+   system($foo);
+
+To find if a token is of type B<Z> you can use B<perltidy -DEBUG>. For the 
+first line above the result is
+
+   1: system($foo );
+   1: kkkkkk{ZZZZb};
+
+which shows that B<system> is type B<k> (keyword) and $foo is type B<Z>.
+
+
 =item Note2: Perltidy's whitespace rules are not perfect
 
 Despite these precautions, it is still possible to introduce syntax errors with
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 6e27ccfe..997ea8d0 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -350,6 +350,7 @@ BEGIN {
         _ris_bli_container_       => $i++,
         _rparent_of_seqno_        => $i++,
         _rchildren_of_seqno_      => $i++,
+        _ris_list_by_seqno_       => $i++,
         _rbreak_container_        => $i++,
         _rshort_nested_           => $i++,
         _length_function_         => $i++,
@@ -684,6 +685,7 @@ sub new {
     $self->[_ris_bli_container_]     = {};
     $self->[_rparent_of_seqno_]      = {};
     $self->[_rchildren_of_seqno_]    = {};
+    $self->[_ris_list_by_seqno_]     = {};
     $self->[_rbreak_container_]      = {};    # prevent one-line blocks
     $self->[_rshort_nested_]         = {};    # blocks not forced open
     $self->[_length_function_]       = $length_function;
@@ -5633,6 +5635,21 @@ sub respace_tokens {
         $self->[_K_first_seq_item_] = $KNEXT;
     }
 
+    # find and remember lists by sequence number
+    # TODO: eventually this should hold a name for the list
+    my $ris_list_by_seqno = {};
+    foreach my $seqno ( keys %{$K_opening_container} ) {
+        my $K_opening  = $K_opening_container->{$seqno};
+        my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
+        next if ($block_type);
+        my $rtype_count = $rtype_count_by_seqno->{$seqno};
+        next unless ($rtype_count);
+        my $fat_comma_count = $rtype_count->{'=>'};
+        my $comma_count     = $rtype_count->{','};
+        my $is_list = ( $fat_comma_count || $comma_count && $comma_count > 1 );
+        $ris_list_by_seqno->{$seqno} = $is_list;
+    }
+
     # Reset memory to be the new array
     $self->[_rLL_] = $rLL_new;
     my $Klimit;
@@ -5648,6 +5665,7 @@ sub respace_tokens {
     $self->[_rhas_broken_container_] = $rhas_broken_container;
     $self->[_rparent_of_seqno_]      = $rparent_of_seqno;
     $self->[_rchildren_of_seqno_]    = $rchildren_of_seqno;
+    $self->[_ris_list_by_seqno_]     = $ris_list_by_seqno;
 
     # DEBUG OPTION: make sure the new array looks okay.
     # This is no longer needed but should be retained for future development.
@@ -5866,28 +5884,9 @@ sub is_list {
 
     # Return true if the immediate contents of a container appears to be a
     # list.
-
     my ( $self, $seqno ) = @_;
     return unless defined($seqno);
-
-    my $K_opening_container = $self->[_K_opening_container_];
-    my $K_opening           = $K_opening_container->{$seqno};
-    return unless ( defined($K_opening) );
-
-    my $rLL        = $self->[_rLL_];
-    my $block_type = $rLL->[$K_opening]->[_BLOCK_TYPE_];
-    return if ($block_type);
-
-    my $token = $rLL->[$K_opening]->[_TOKEN_];
-    return if ( $token eq ':' );
-
-    # We will require at least 2 commas or 1 fat comma in the
-    # immediate lower level.
-    my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
-    my $fat_comma_count      = $rtype_count_by_seqno->{$seqno}->{'=>'};
-    my $comma_count          = $rtype_count_by_seqno->{$seqno}->{','};
-    my $is_list = ( $fat_comma_count || $comma_count && $comma_count > 1 );
-    return $is_list;
+    return $self->[_ris_list_by_seqno_]->{$seqno};
 }
 
 sub resync_lines_and_tokens {
@@ -11063,7 +11062,7 @@ sub insert_additional_breaks {
             && $i_break_right <= $i_l )
         {
             splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
-            splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+            splice( @{$ri_last},  $line_number, 1, ( $i_break_left, $i_l ) );
         }
     }
     return;
-- 
2.39.5