improve formatting of sort/map/grep blocks

author Steve Hancock <perltidy@users.sourceforge.net>

Mon, 29 May 2023 13:14:27 +0000 (06:14 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Mon, 29 May 2023 13:14:27 +0000 (06:14 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Mon, 29 May 2023 13:14:27 +0000 (06:14 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Mon, 29 May 2023 13:14:27 +0000 (06:14 -0700)
diff --git a/docs/ci_update.md b/docs/ci_update.md

index e5bdc7adaeb0e6ec6be3559aa5a3f39df2f7f93a..e729c8b291ed672fc6f6242b6ee8783e8445cee7 100644 (file)
--- a/docs/ci_update.md
+++ b/docs/ci_update.md
@@ -29,9 +29,10 @@ The original continuation indentation programming in perltidy operated in the
  initial pass through a file, and this placed some limits on what it could do.
  This update moves this coding downstream in the processing pipeline, where the
  entire file is accessible with full data structures, and this allows several
-improvements to be made.  These mainly involve either (1) the continuation
+improvements to be made.  These mainly involve (1) the continuation
  indentation assigned to comments in unusual circumstances, or (2) the
-indentation of complex ternary expressions.  Some examples are as follows.
+indentation of complex ternary expressions, or (3) the indentation of
+chains of ``sort/map/grep`` blocks.  Some examples are as follows.
  
  ## Block comment indentation changes before closing braces, brackets and parens
  
@@ -308,3 +309,44 @@ they are a continuation of the previous line.
      }
  ```
  
+## Some improved indentation of filter block chains
+
+The lines of an isolated chain of ``sort/map/grep`` blocks are normally all
+given the same indentation.  For example
+
+```
+            @new_in_dir = (
+                grep { not $seen{$_} }
+                map  { $dir . "/" . $_ }
+                grep { not ignore_file($_) }
+                grep { not $skip{$_} } readdir(D)
+            );
+```
+
+Previously, there were a a number of situations where this could not be
+achieved. As an example, if the above example had side comments then the
+formatting would be
+
+```
+            @new_in_dir = (
+                grep   { not $seen{$_} }          # files not yet processed
+                  map  { $dir . "/" . $_ }        # map from file to dir/file
+                  grep { not ignore_file($_) }    # ignore files in cvsignore
+                  grep { not $skip{$_} }          # skip files to be ignored
+                  readdir(D)
+            );
+```
+
+The first line now has a different indentation from the rest, and this is
+undesirable because ideally indentation should be independent of the existance
+of side comments.  The new version handles this correctly:
+
+```
+            @new_in_dir = (
+                grep { not $seen{$_} }          # files not yet processed
+                map  { $dir . "/" . $_ }        # map from file to dir/file
+                grep { not ignore_file($_) }    # ignore files in cvsignore
+                grep { not $skip{$_} }          # skip files to be ignored
+                  readdir(D)
+            );
+```
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index d587b10242b57fcab053dd34d0fddd663bf6b2b7..42a56494c9952d5024ea4427a6a50fb465b68393 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -6018,8 +6018,8 @@ EOM
      # Verify that the line hash does not have any unknown keys.
      $self->check_line_hashes() if (DEVEL_MODE);
  
-    # Experimental new ci calculation
-    $self->set_ci();
+    # calling set_ci before respace is possible, but type counts are not ready
+    ## $self->set_ci();
  
      {
          # Make a pass through all tokens, adding or deleting any whitespace as
@@ -6033,6 +6033,9 @@ EOM
              return 1;
          }
  
+        # calling set_ci after respace allows it to use type counts
+        $self->set_ci();
+
          $self->find_multiline_qw($rqw_lines);
      }
  
@@ -6725,6 +6728,10 @@ sub set_ci {
      my $Klimit = $self->[_Klimit_];
      return unless defined($Klimit);
  
+    # Set a flag which tells if sub respace_tokens has been called yet. This
+    # allows this sub to work either before or after respace_tokens is called.
+    my $respace_tokens_called = defined( $rLL->[0]->[_KNEXT_SEQ_ITEM_] );
+
      my $token   = ';';
      my $type    = ';';
      my $ci_next = 0;
@@ -6764,6 +6771,7 @@ sub set_ci {
      my $K_opening_ternary    = $self->[_K_opening_ternary_];
      my $K_closing_ternary    = $self->[_K_closing_ternary_];
      my $rlines               = $self->[_rlines_];
+    my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
  
      my $want_break_before_comma = $want_break_before{','};
  
@@ -6777,12 +6785,15 @@ sub set_ci {
          my $Kcn = $self->K_next_code($Kc);
          return unless defined($Kcn);
          my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
-        return if ( defined($seqno_n) );
+
+        #return if ( defined($seqno_n) );
+        return if ($seqno_n);
          my $Knn = $self->K_next_code($Kcn);
          return unless defined($Knn);
          my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_];
-        return unless defined($seqno_nn);
-        return unless $K_opening_container->{$seqno_nn} == $Knn;
+        return unless ($seqno_nn);
+        my $K_nno = $K_opening_container->{$seqno_nn};
+        return unless $K_nno && $K_nno == $Knn;
          my $block_type = $rblock_type_of_seqno->{$seqno_nn};
  
          if ($block_type) {
@@ -6928,9 +6939,22 @@ sub set_ci {
                  if ( $token eq '(' ) {
  
                      # 'foreach' and 'for' paren contents are treated as logical
+                    #  except for C-style 'for'
                      if ( $last_type eq 'k' ) {
-                        $is_logical ||=
-                          $last_token eq 'for' || $last_token eq 'foreach';
+                        $is_logical ||= $last_token eq 'foreach';
+
+                        if ( $last_token eq 'for' ) {
+                            my $rtype_count = $rtype_count_by_seqno->{$seqno};
+                            if (   $rtype_count
+                                && $rtype_count->{'f'} )
+                            {
+                                # C-style 'for' container will be type 'List'
+                                $is_logical = 0;
+                            }
+                            else {
+                                $is_logical = 1;
+                            }
+                        }
                      }
  
                      # Check for 'for' and 'foreach' loops with iterators
@@ -6999,12 +7023,33 @@ sub set_ci {
  
                      if ( !$no_semicolon ) {
  
-                        # Fix for block types sort/map/etc which use zero ci
-                        # at terminal brace if previous keyword had zero ci.
-                        # This will cause sort/map/grep filter blocks to line
-                        # up.
+                        # Optional fix for block types sort/map/etc which use
+                        # zero ci at terminal brace if previous keyword had
+                        # zero ci.  This will cause sort/map/grep filter blocks
+                        # to line up. Note that sub 'undo_ci' will also try to
+                        # do this, so this is not a critical operation.
                          if ( $is_block_with_ci{$block_type} ) {
-                            if ( $map_block_follows->($seqno) ) {
+                            my $parent_seqno = $rparent->{_seqno};
+                            my $rtype_count =
+                              $rtype_count_by_seqno->{$parent_seqno};
+                            if (
+
+                                # only do this within containers
+                                $parent_seqno != SEQ_ROOT
+
+                                # Only do this if sub respace_tokens has
+                                # been called, so that counts are available
+                                && $respace_tokens_called
+                                && (
+
+                                    # only in containers without ',' and ';'
+                                    # TODO: could subtract 1 a trailing ';'
+                                    !$rtype_count || ( !$rtype_count->{','}
+                                        && !$rtype_count->{';'} )
+                                )
+                                && $map_block_follows->($seqno)
+                              )
+                            {
                                  if ($ci_last) {
                                      $ci_close = $ci_this;
                                  }
@@ -7346,6 +7391,16 @@ sub set_ci {
              $rparent->{_comma_count}++;
          }
  
+        # Treat hanging side comments like blanks
+        elsif ( $type eq 'q' && $token eq EMPTY_STRING ) {
+            $ci_next = $ci_this;
+
+            $rtoken_K->[_CI_LEVEL_] = $ci_this;
+
+            # 'next' to avoid saving last_ values for blanks and commas
+            next;
+        }
+
          #-----------------------------------------------------------------
          # Development test: ci_this should match the ci from the tokenizer
          # except where the new value makes an improvement.
diff --git a/t/snippets/expect/c154.def b/t/snippets/expect/c154.def

index 238900d55e055fff2ac78502d57d27866a9bd86a..50d23d452d1254fd16cbca994bd239ffe82ef822 100644 (file)
--- a/t/snippets/expect/c154.def
+++ b/t/snippets/expect/c154.def
@@ -18,7 +18,7 @@
                      $q = 201 ;
                      print '-' x 79, "\n" ;
                      $g = (
-                          $f ^ ( $w = ( $z = $m . $e ) ^ substr $e, $q )
+                        $f ^ ( $w = ( $z = $m . $e ) ^ substr $e, $q )
                            ^ ( $n = $b ^ $d | $a ^ $l )
                      ) & ( $w | $z ^ $f ^ $n ) & ( $l | $g )
                    )
diff --git a/t/snippets26.t b/t/snippets26.t

index c0516fcf1128890cc665e6152d0606f2a1ae74d3..e4351228aee47f5280c30daaabb736abaf19ed9d 100644 (file)
--- a/t/snippets26.t
+++ b/t/snippets26.t
@@ -791,7 +791,7 @@ abcdefghijklmnopq
                      $q = 201 ;
                      print '-' x 79, "\n" ;
                      $g = (
-                          $f ^ ( $w = ( $z = $m . $e ) ^ substr $e, $q )
+                        $f ^ ( $w = ( $z = $m . $e ) ^ substr $e, $q )
                            ^ ( $n = $b ^ $d | $a ^ $l )
                      ) & ( $w | $z ^ $f ^ $n ) & ( $l | $g )
                    )
author	Steve Hancock <perltidy@users.sourceforge.net>
	Mon, 29 May 2023 13:14:27 +0000 (06:14 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Mon, 29 May 2023 13:14:27 +0000 (06:14 -0700)
docs/ci_update.md		patch \| blob \| history
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
t/snippets/expect/c154.def		patch \| blob \| history
t/snippets26.t		patch \| blob \| history