cleanups and optimizations

author Steve Hancock <perltidy@users.sourceforge.net>

Sun, 11 Sep 2022 22:02:18 +0000 (15:02 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Sun, 11 Sep 2022 22:02:18 +0000 (15:02 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Sun, 11 Sep 2022 22:02:18 +0000 (15:02 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Sun, 11 Sep 2022 22:02:18 +0000 (15:02 -0700)
diff --git a/CHANGES.md b/CHANGES.md

index 808768e284c9a8262139679905316c1df7a29617..e444ac10b8e1f04816ba7f95d798ae7e0ed10221 100644 (file)
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -80,7 +80,7 @@
              my ($curr) = current();
              err(@_);
  
-    - This version runs 5 to 15 percent faster than the previous
+    - This version runs about 10 to 15 percent faster than the previous
        release on large files, depending on formatting parameters.
  
  ## 2022 06 13
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index ff0da29f90c939165f79220bbdbad6ca0827dbab..6a96886704ecb4bc1a981e109660cc9b15817730 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -230,6 +230,7 @@ my (
  
      # Static hashes initialized in a BEGIN block
      %is_assignment,
+    %is_non_list_type,
      %is_if_unless_and_or_last_next_redo_return,
      %is_if_elsif_else_unless_while_until_for_foreach,
      %is_if_unless_while_until_for_foreach,
@@ -598,6 +599,10 @@ BEGIN {
      );
      @is_assignment{@q} = (1) x scalar(@q);
  
+    # a hash needed by break_lists for efficiency:
+    push @q, qw{ ; < > ~ f };
+    @is_non_list_type{@q} = (1) x scalar(@q);
+
      @q = qw(is if unless and or err last next redo return);
      @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
  
@@ -4044,6 +4049,11 @@ EOM
  
          my ($self) = @_;
  
+        #-----------------------------------------------------------------
+        # Define a 'bond strength' for each token pair in an output batch.
+        # See comments above for definition of bond strength.
+        #-----------------------------------------------------------------
+
          my $rbond_strength_to_go = [];
  
          my $rLL               = $self->[_rLL_];
@@ -5390,7 +5400,14 @@ EOM
      sub write_line_inner_loop {
          my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
  
-        # Copy the tokens for this line to their new storage location
+        #---------------------------------------------------------------------
+        # Copy the tokens on one line received from the tokenizer to their new
+        # storage locations.
+        #---------------------------------------------------------------------
+
+        # Input parameters:
+        #  $line_of_tokens_old = line received from tokenizer
+        #  $line_of_tokens     = line of tokens being formed for formatter
  
          my $rtokens = $line_of_tokens_old->{_rtokens};
          my $jmax    = @{$rtokens} - 1;
@@ -5752,7 +5769,7 @@ sub set_CODE_type {
              # A line which is entirely a quote or pattern must go out
              # verbatim.  Note: the \n is contained in $input_line.
              if ( $jmax <= 0 ) {
-                if ( ( $input_line =~ "\t" ) ) {
+                if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
                      my $input_line_number = $line_of_tokens->{_line_number};
                      $self->note_embedded_tab($input_line_number);
                  }
@@ -6302,18 +6319,20 @@ sub respace_tokens {
  
      my $self = shift;
  
-    # return parameters
+    #--------------------------------------------------------------------------
+    # This routine is called once per file to do as much formatting as possible
+    # before new line breaks are set.
+    #--------------------------------------------------------------------------
+
+    # Return parameters:
+    # Set $severe_error=true if processing must terminate immediately
      my ( $severe_error, $rqw_lines );
  
+    # We change any spaces in --indent-only mode
      if ( $rOpts->{'indent-only'} ) {
          return ( $severe_error, $rqw_lines );
      }
  
-    # This routine is called once per file to do as much formatting as possible
-    # before new line breaks are set.
-
-    # Set $severe_error=true if processing must terminate immediately
-
      # This routine makes all necessary and possible changes to the tokenization
      # after the initial tokenization of the file. This is a tedious routine,
      # but basically it consists of inserting and deleting whitespace between
@@ -6499,7 +6518,7 @@ sub respace_tokens {
                  # The level and ci_level of newly created spaces should be the
                  # same as the previous token. Otherwise blinking states can
                  # be created if the -lp mode is used. See similar coding in
-                # sub 'store_token_and_space'.  Fixes cases b1109 b1110.
+                # sub 'store_space_and_token'.  Fixes cases b1109 b1110.
                  $rcopy->[_LEVEL_] =
                    $rLL_new->[-1]->[_LEVEL_];
                  $rcopy->[_CI_LEVEL_] =
@@ -6540,9 +6559,10 @@ sub respace_tokens_inner_loop {
  
      my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
  
-    #-------------------------------------------------------
-    # Loop to copy all tokens on this line, with any changes
-    #-------------------------------------------------------
+    #-----------------------------------------------------------------
+    # Loop to copy all tokens on one line, making any spacing changes,
+    # while also collecting information needed by later subs.
+    #-----------------------------------------------------------------
      my $type_sequence;
      my $rtoken_vars;
      foreach my $KK ( $Kfirst .. $Klast ) {
@@ -6797,10 +6817,15 @@ EOM
              # this)
              $token =~ s/\s*$//;
              $rtoken_vars->[_TOKEN_] = $token;
-            $self->note_embedded_tab($input_line_number)
-              if ( $token =~ "\t" );
-            $self->store_token_and_space( $rtoken_vars,
-                $rwhitespace_flags->[$KK] == WS_YES );
+            if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
+                $self->note_embedded_tab($input_line_number);
+            }
+            if ( $rwhitespace_flags->[$KK] == WS_YES ) {
+                $self->store_space_and_token($rtoken_vars);
+            }
+            else {
+                $self->store_token($rtoken_vars);
+            }
              next;
          } ## end if ( $type eq 'q' )
  
@@ -6823,12 +6848,13 @@ EOM
  
          # check a quote for problems
          elsif ( $type eq 'Q' ) {
-            $self->check_Q( $KK, $Kfirst, $input_line_number );
+            $self->check_Q( $KK, $Kfirst, $input_line_number )
+              if ( $self->[_save_logfile_] );
          }
  
          # Store this token with possible previous blank
          if ( $rwhitespace_flags->[$KK] == WS_YES ) {
-            $self->store_token_and_space( $rtoken_vars, 1 );
+            $self->store_space_and_token($rtoken_vars);
          }
          else {
              $self->store_token($rtoken_vars);
@@ -7066,14 +7092,20 @@ sub set_permanently_broken {
  } ## end sub set_permanently_broken
  
  sub store_token {
+
      my ( $self, $item ) = @_;
  
+    #------------------------------------------
+    # Store one token during respace operations
+    #------------------------------------------
+
+    # Input parameter:
+    #  $item = ref to a token
+
      # This will be the index of this item in the new array
      my $KK_new = @{$rLL_new};
  
-    #------------------------------------------------------------------
-    # NOTE: called once per token so coding efficiency is critical here
-    #------------------------------------------------------------------
+    # NOTE: this sub is called once per token so coding efficiency is critical.
  
      # The next multiple assignment statements are significantly faster than
      # doing them one-by-one.
@@ -7299,14 +7331,13 @@ sub store_token {
      return;
  } ## end sub store_token
  
-sub store_token_and_space {
-    my ( $self, $item, $want_space ) = @_;
+sub store_space_and_token {
+    my ( $self, $item ) = @_;
  
      # store a token with preceding space if requested and needed
  
      # First store the space
-    if (   $want_space
-        && @{$rLL_new}
+    if (   @{$rLL_new}
          && $rLL_new->[-1]->[_TYPE_] ne 'b'
          && $rOpts_add_whitespace )
      {
@@ -7333,7 +7364,7 @@ sub store_token_and_space {
      # then the token
      $self->store_token($item);
      return;
-} ## end sub store_token_and_space
+} ## end sub store_space_and_token
  
  sub add_phantom_semicolon {
  
@@ -7485,10 +7516,14 @@ sub add_phantom_semicolon {
  
  sub check_Q {
  
-    # Check that a quote looks okay
+    # Check that a quote looks okay, and report possible problems
+    # to the logfile.
+
      my ( $self, $KK, $Kfirst, $line_number ) = @_;
      my $token = $rLL->[$KK]->[_TOKEN_];
-    $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
+    if ( $token =~ /\t/ ) {
+        $self->note_embedded_tab($line_number);
+    }
  
      # The remainder of this routine looks for something like
      #        '$var = s/xxx/yyy/;'
@@ -9659,7 +9694,7 @@ sub weld_nested_quotes {
              my $next_type  = $rLL->[$Kn]->[_TYPE_];
              next
                unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
-                && $next_token =~ /^q/ );
+                && substr( $next_token, 0, 1 ) eq 'q' );
  
              # The token before the closing container must also be a quote
              my $Kouter_closing = $K_closing_container->{$outer_seqno};
@@ -10049,7 +10084,7 @@ sub clip_adjusted_levels {
      my ($self) = @_;
      my $radjusted_levels = $self->[_radjusted_levels_];
      return unless defined($radjusted_levels) && @{$radjusted_levels};
-    my $min = min( @{$radjusted_levels} );   # fast check for min
+    my $min = min( @{$radjusted_levels} );    # fast check for min
      if ( $min < 0 ) {
  
          # slow loop, but rarely needed
@@ -12526,14 +12561,17 @@ EOM
  
          my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
  
-        # Add one token to the next batch.
+        #-------------------------------------------------------
+        # Token storage utility for sub process_line_of_CODE.
+        # Add one token to the next batch of '_to_go' variables.
+        #-------------------------------------------------------
+
+        # Input parameters:
          #   $Ktoken_vars = the index K in the global token array
          #   $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
          #                  unless they are temporarily being overridden
  
-        #------------------------------------------------------------------
          # NOTE: called once per token so coding efficiency is critical here
-        #------------------------------------------------------------------
  
          my (
  
@@ -12627,7 +12665,7 @@ EOM
            $summed_lengths_to_go[$max_index_to_go] + $length;
  
          # Initializations for first token of new batch
-        if ( $max_index_to_go == 0 ) {
+        if ( !$max_index_to_go ) {
  
              # Reset flag '$starting_in_quote' for a new batch.  It must be set
              # to the value of '$in_continued_quote', but here for efficiency we
@@ -13214,6 +13252,10 @@ EOM
  
          my ( $self, $has_side_comment ) = @_;
  
+        #--------------------------------------------------------------------
+        # Loop to move all tokens from an input line to a newly forming batch
+        #--------------------------------------------------------------------
+
          # We do not want a leading blank if the previous batch just got output
  
          if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
@@ -14706,6 +14748,11 @@ EOM
  
          my ($self) = @_;
  
+        #-----------------------------------------------------------------
+        # This sub directs the formatting of one complete batch of tokens.
+        # The tokens of the batch are in the '_to_go' arrays.
+        #-----------------------------------------------------------------
+
          my $this_batch = $self->[_this_batch_];
          $batch_count++;
  
@@ -14747,7 +14794,7 @@ EOM
          # Shortcut for block comments
          # Note that this shortcut does not work for -lp yet
          #--------------------------------------------------
-        elsif ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
+        elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) {
              my $ibeg = 0;
              $this_batch->[_ri_first_]                 = [$ibeg];
              $this_batch->[_ri_last_]                  = [$ibeg];
@@ -15081,21 +15128,22 @@ EOM
          my $called_pad_array_to_go;
  
          # set all forced breakpoints for good list formatting
-        my $is_long_line = $max_index_to_go > 0
-          && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
-
-        my $old_line_count_in_batch = 1;
+        my $is_long_line;
+        my $multiple_old_lines_in_batch;
          if ( $max_index_to_go > 0 ) {
+            $is_long_line =
+              $self->excess_line_length( $imin, $max_index_to_go ) > 0;
+
              my $Kbeg = $K_to_go[0];
              my $Kend = $K_to_go[$max_index_to_go];
-            $old_line_count_in_batch +=
+            $multiple_old_lines_in_batch =
                $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
          }
  
          my $rbond_strength_bias = [];
          if (
                 $is_long_line
-            || $old_line_count_in_batch > 1
+            || $multiple_old_lines_in_batch
  
              # must always call break_lists() with unbalanced batches because
              # it is maintaining some stacks
@@ -15168,7 +15216,8 @@ EOM
  
              $self->break_all_chain_tokens( $ri_first, $ri_last );
  
-            $self->break_equals( $ri_first, $ri_last );
+            $self->break_equals( $ri_first, $ri_last )
+              if @{$ri_first} >= 3;
  
              # now we do a correction step to clean this up a bit
              # (The only time we would not do this is for debugging)
@@ -17894,6 +17943,14 @@ sub break_long_lines {
      # maximum line length.
      #-----------------------------------------------------------
  
+    my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
+
+    # Input parameters:
+    #  $saw_good_break - a flag set by break_lists
+    #  $rcolon_list    - ref to a list of all the ? and : tokens in the batch,
+    #    in order.
+    #  $rbond_strength_bias - small bond strength bias values set by break_lists
+
      # Output: returns references to the arrays:
      #  @i_first
      #  @i_last
@@ -17905,11 +17962,6 @@ sub break_long_lines {
      # may be updated to be =1 for any index $i after which there must be
      # a break.  This signals later routines not to undo the breakpoint.
  
-    my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
-
-    # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
-    # order.
-
      # Method:
      # This routine is called if a statement is longer than the maximum line
      # length, or if a preliminary scanning located desirable break points.
@@ -18125,6 +18177,19 @@ sub break_lines_inner_loop {
      # which, if possible, does not exceed the maximum line length.
      #-----------------------------------------------------------------
  
+    my (
+        $self,    #
+
+        $i_begin,
+        $i_last_break,
+        $imax,
+        $last_break_strength,
+        $line_count,
+        $rbond_strength_to_go,
+        $saw_good_break,
+
+    ) = @_;
+
      # Given:
      #   $i_begin               = first index of range
      #   $i_last_break          = index of previous break
@@ -18140,19 +18205,6 @@ sub break_lines_inner_loop {
      #   $leading_alignment_type = special token type after break
      #   $Msg                    = string of debug info
  
-    my (
-        $self,    #
-
-        $i_begin,
-        $i_last_break,
-        $imax,
-        $last_break_strength,
-        $line_count,
-        $rbond_strength_to_go,
-        $saw_good_break,
-
-    ) = @_;
-
      my $Msg                    = EMPTY_STRING;
      my $strength               = NO_BREAK;
      my $i_test                 = $i_begin - 1;
@@ -19339,13 +19391,12 @@ EOM
                  $last_dot_index[$depth] = $i;
              }
  
-            # Turn off alignment if we are sure that this is not a list
+            # Turn off comma alignment if we are sure that this is not a list
              # environment.  To be safe, we will do this if we see certain
-            # non-list tokens, such as ';', and also the environment is
-            # not a list.  Note that '=' could be in any of the = operators
-            # (lextest.t). We can't just use the reported environment
-            # because it can be incorrect in some cases.
-            elsif ( ( $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type} )
+            # non-list tokens, such as ';', '=', and also the environment is
+            # not a list.
+            ##      $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
+            elsif ( $is_non_list_type{$type}
                  && !$self->is_in_list_by_i($i) )
              {
                  $dont_align[$depth]         = 1;
@@ -21725,13 +21776,13 @@ sub get_available_spaces_to_go {
  
      sub set_lp_indentation {
  
+        my ($self) = @_;
+
          #------------------------------------------------------------------
          # Define the leading whitespace for all tokens in the current batch
          # when the -lp formatting is selected.
          #------------------------------------------------------------------
  
-        my ($self) = @_;
-
          return unless ($rOpts_line_up_parentheses);
          return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
  
@@ -22841,9 +22892,10 @@ sub convey_batch_to_vertical_aligner {
      # have been defined. Here we prepare the lines for passing to the vertical
      # aligner.  We do the following tasks:
      # - mark certain vertical alignment tokens, such as '=', in each line
-    # - make minor indentation adjustments
+    # - make final indentation adjustments
      # - do logical padding: insert extra blank spaces to help display certain
      #   logical constructions
+    # - send the line to the vertical aligner
  
      my $this_batch = $self->[_this_batch_];
      my $ri_first   = $this_batch->[_ri_first_];
@@ -23012,11 +23064,28 @@ EOM
          # --------------------------------------
          # get the final indentation of this line
          # --------------------------------------
-        my ( $indentation, $lev, $level_end, $i_terminal, $is_outdented_line )
-          = $self->get_final_indentation( $ibeg, $iend, $rfields,
-            $rpatterns,         $ri_first, $ri_last,
-            $rindentation_list, $ljump,    $starting_in_quote,
-            $is_static_block_comment );
+        my (
+
+            $indentation,
+            $lev,
+            $level_end,
+            $i_terminal,
+            $is_outdented_line,
+
+        ) = $self->get_final_indentation(
+
+            $ibeg,
+            $iend,
+            $rfields,
+            $rpatterns,
+            $ri_first,
+            $ri_last,
+            $rindentation_list,
+            $ljump,
+            $starting_in_quote,
+            $is_static_block_comment,
+
+        );
  
          # --------------------------------
          # define flag 'outdent_long_lines'
@@ -23399,16 +23468,21 @@ EOM
  
      sub set_vertical_alignment_markers {
  
-        # This routine takes the first step toward vertical alignment of the
-        # lines of output text.  It looks for certain tokens which can serve as
-        # vertical alignment markers (such as an '=').
-        #
+        my ( $self, $ri_first, $ri_last ) = @_;
+
+        #----------------------------------------------------------------------
+        # This routine looks at output lines for certain tokens which can serve
+        # as vertical alignment markers (such as an '=').
+        #----------------------------------------------------------------------
+
+        # Input parameters:
+        #   $ri_first = ref to list of starting line indexes in _to_go arrays
+        #   $ri_last  = ref to list of ending line indexes in _to_go arrays
+
          # Method: We look at each token $i in this output batch and set
          # $ralignment_type_to_go->[$i] equal to those tokens at which we would
          # accept vertical alignment.
  
-        my ( $self, $ri_first, $ri_last ) = @_;
-
          my $ralignment_type_to_go;
          my $ralignment_counts       = [];
          my $ralignment_hash_by_line = [];
@@ -24823,11 +24897,26 @@ sub xlp_tweak {
  
      sub make_alignment_patterns {
  
-        # Here we do some important preliminary work for the
-        # vertical aligner.  We create four arrays for one
-        # output line. These arrays contain strings that can
-        # be tested by the vertical aligner to see if
-        # consecutive lines can be aligned vertically.
+        my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
+            $ralignment_hash )
+          = @_;
+
+        #------------------------------------------------------------------
+        # This sub creates arrays of vertical alignment info for one output
+        # line.
+        #------------------------------------------------------------------
+
+        # Input parameters:
+        #  $ibeg, $iend - index range of this line in the _to_go arrays
+        #  $ralignment_type_to_go - alignment type of tokens, like '=', if any
+        #  $alignment_count - number of alignment tokens in the line
+        #  $ralignment_hash - this contains all of the alignments for this
+        #    line.  It is not yet used but is available for future coding in
+        #    case there is a need to do a preliminary scan of alignment tokens.
+
+        # The arrays which are created contain strings that can be tested by
+        # the vertical aligner to see if consecutive lines can be aligned
+        # vertically.
          #
          # The four arrays are indexed on the vertical
          # alignment fields and are:
@@ -24844,13 +24933,6 @@ sub xlp_tweak {
          #   allowed, even when the alignment tokens match.
          # @field_lengths - the display width of each field
  
-        my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
-            $ralignment_hash )
-          = @_;
-
-        # The var $ralignment_hash contains all of the alignments for this
-        # line.  It is not yet used but is available for future coding in case
-        # there is a need to do a preliminary scan of the alignment tokens.
          if (DEVEL_MODE) {
              my $new_count = 0;
              if ( defined($ralignment_hash) ) {
@@ -25374,9 +25456,26 @@ sub make_paren_name {
  
      sub get_final_indentation {
  
-        #--------------------------------------------------------------------
-        # This routine sets the final indentation of a line in the Formatter.
-        #--------------------------------------------------------------------
+        my (
+            $self,    #
+
+            $ibeg,
+            $iend,
+            $rfields,
+            $rpatterns,
+            $ri_first,
+            $ri_last,
+            $rindentation_list,
+            $level_jump,
+            $starting_in_quote,
+            $is_static_block_comment
+
+        ) = @_;
+
+        #--------------------------------------------------------------
+        # This routine makes any necessary adjustments to get the final
+        # indentation of a line in the Formatter.
+        #--------------------------------------------------------------
  
          # It starts with the basic indentation which has been defined for the
          # leading token, and then takes into account any options that the user
@@ -25399,15 +25498,6 @@ sub make_paren_name {
          #    undo_ci, which was processed earlier, so care has to be taken to
          #    keep them coordinated.
  
-        my (
-            $self,       $ibeg,
-            $iend,       $rfields,
-            $rpatterns,  $ri_first,
-            $ri_last,    $rindentation_list,
-            $level_jump, $starting_in_quote,
-            $is_static_block_comment
-        ) = @_;
-
          # Find the last code token of this line
          my $i_terminal    = $iend;
          my $terminal_type = $types_to_go[$iend];
@@ -25498,11 +25588,14 @@ sub make_paren_name {
  
              # This can be tedious so we let a sub do it
              (
-                $adjust_indentation,  $default_adjust_indentation,
-                $opening_indentation, $opening_offset,
-                $is_leading,          $opening_exists
-              )
-              = $self->get_closing_token_indentation(
+                $adjust_indentation,
+                $default_adjust_indentation,
+                $opening_indentation,
+                $opening_offset,
+                $is_leading,
+                $opening_exists
+
+            ) = $self->get_closing_token_indentation(
  
                  $ibeg,
                  $iend,
@@ -25514,7 +25607,7 @@ sub make_paren_name {
                  $is_semicolon_terminated,
                  $seqno_qw_closing,
  
-              );
+            );
          }
  
          #--------------------------------------------------------
@@ -25842,8 +25935,15 @@ sub make_paren_name {
              }
          }
  
-        return ( $indentation, $lev, $level_end, $i_terminal,
-            $is_outdented_line );
+        return (
+
+            $indentation,
+            $lev,
+            $level_end,
+            $i_terminal,
+            $is_outdented_line,
+
+        );
      } ## end sub get_final_indentation
  
      sub get_closing_token_indentation {
@@ -25852,7 +25952,7 @@ sub make_paren_name {
          # token - i.e. one of these:     ) ] } :
  
          my (
-            $self,
+            $self,    #
  
              $ibeg,
              $iend,
@@ -25951,8 +26051,6 @@ sub make_paren_name {
                = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
                  $ri_last, $rindentation_list, $seqno_qw_closing );
  
-            my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
-
              # First set the default behavior:
              if (
  
@@ -26017,7 +26115,7 @@ sub make_paren_name {
  
                  # require LIST environment; otherwise, we may outdent too much -
                  # this can happen in calls without parentheses (overload.t);
-                && $terminal_is_in_list
+                && $self->is_in_list_by_i($i_terminal)
                )
              {
                  $adjust_indentation = 1;
@@ -26075,10 +26173,10 @@ sub make_paren_name {
                  # but right now we do not have that information.  For now
                  # we see if we are in a list, and this works well.
                  # See test files 'sub*.t' for good test cases.
-                if (   $terminal_is_in_list
-                    && !$rOpts_indent_closing_brace
+                if (  !$rOpts_indent_closing_brace
                      && $block_type_beg
-                    && $block_type_beg =~ /$ASUB_PATTERN/ )
+                    && $self->[_ris_asub_block_]->{$seqno_beg}
+                    && $self->is_in_list_by_i($i_terminal) )
                  {
                      (
                          $opening_indentation, $opening_offset,
@@ -26230,9 +26328,16 @@ sub make_paren_name {
              if ($is_leading) { $adjust_indentation = 2; }
          }
  
-        return ( $adjust_indentation, $default_adjust_indentation,
-            $opening_indentation, $opening_offset,
-            $is_leading,          $opening_exists );
+        return (
+
+            $adjust_indentation,
+            $default_adjust_indentation,
+            $opening_indentation,
+            $opening_offset,
+            $is_leading,
+            $opening_exists,
+
+        );
      }
  
  } ## end closure get_final_indentation
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index 142bda03e374c670cbcdf081acf5c0f9688e8233..e97a06fd4d8bea447fd5107c5142853adc7a6975 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -781,6 +781,15 @@ sub get_input_line_number {
      return $tokenizer_self->[_last_line_number_];
  }
  
+sub write_logfile_numbered_msg {
+    my ($msg) = @_;
+
+    # write input line number + message to logfile
+    my $input_line_number = get_input_line_number();
+    write_logfile_entry("Line $input_line_number: $msg");
+    return;
+}
+
  # returns the next tokenized line
  sub get_line {
  
@@ -796,12 +805,6 @@ sub get_line {
  
      my $input_line_number = ++$tokenizer_self->[_last_line_number_];
  
-    my $write_logfile_entry = sub {
-        my ($msg) = @_;
-        write_logfile_entry("Line $input_line_number: $msg");
-        return;
-    };
-
      # Find and remove what characters terminate this line, including any
      # control r
      my $input_line_separator = EMPTY_STRING;
@@ -820,7 +823,7 @@ sub get_line {
      # for backwards compatibility we keep the line text terminated with
      # a newline character
      $input_line .= "\n";
-    $tokenizer_self->[_line_of_text_] = $input_line;    # update
+    $tokenizer_self->[_line_of_text_] = $input_line;
  
      # create a data structure describing this line which will be
      # returned to the caller.
@@ -860,6 +863,7 @@ sub get_line {
          _square_bracket_depth      => $square_bracket_depth,
          _paren_depth               => $paren_depth,
          _quote_character           => EMPTY_STRING,
+## Skip these needless initializations for efficiency:
  ##      _rtoken_type               => undef,
  ##      _rtokens                   => undef,
  ##      _rlevels                   => undef,
@@ -887,7 +891,8 @@ sub get_line {
          if ( $candidate_target eq $here_doc_target ) {
              $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
              $line_of_tokens->{_line_type} = 'HERE_END';
-            $write_logfile_entry->("Exiting HERE document $here_doc_target\n");
+            write_logfile_numbered_msg(
+                "Exiting HERE document $here_doc_target\n");
  
              my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
              if ( @{$rhere_target_list} ) {  # there can be multiple here targets
@@ -896,7 +901,7 @@ sub get_line {
                  $tokenizer_self->[_here_doc_target_] = $here_doc_target;
                  $tokenizer_self->[_here_quote_character_] =
                    $here_quote_character;
-                $write_logfile_entry->(
+                write_logfile_numbered_msg(
                      "Entering HERE document $here_doc_target\n");
                  $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
                  $tokenizer_self->[_started_looking_for_here_target_at_] =
@@ -932,7 +937,7 @@ sub get_line {
  
              # This is the end when count reaches 0
              if ( !$tokenizer_self->[_in_format_] ) {
-                $write_logfile_entry->("Exiting format section\n");
+                write_logfile_numbered_msg("Exiting format section\n");
                  $line_of_tokens->{_line_type} = 'FORMAT_END';
              }
          }
@@ -954,7 +959,7 @@ sub get_line {
          $line_of_tokens->{_line_type} = 'POD';
          if ( $input_line =~ /^=cut/ ) {
              $line_of_tokens->{_line_type} = 'POD_END';
-            $write_logfile_entry->("Exiting POD section\n");
+            write_logfile_numbered_msg("Exiting POD section\n");
              $tokenizer_self->[_in_pod_] = 0;
          }
          if ( $input_line =~ /^\#\!.*perl\b/ && !$tokenizer_self->[_in_end_] ) {
@@ -972,7 +977,7 @@ sub get_line {
          $line_of_tokens->{_line_type} = 'SKIP';
          if ( $input_line =~ /$code_skipping_pattern_end/ ) {
              $line_of_tokens->{_line_type} = 'SKIP_END';
-            $write_logfile_entry->("Exiting code-skipping section\n");
+            write_logfile_numbered_msg("Exiting code-skipping section\n");
              $tokenizer_self->[_in_skipped_] = 0;
          }
          return $line_of_tokens;
@@ -996,7 +1001,7 @@ sub get_line {
          # end of a pod section
          if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
              $line_of_tokens->{_line_type} = 'POD_START';
-            $write_logfile_entry->("Entering POD section\n");
+            write_logfile_numbered_msg("Entering POD section\n");
              $tokenizer_self->[_in_pod_] = 1;
              return $line_of_tokens;
          }
@@ -1015,7 +1020,7 @@ sub get_line {
          # end of a pod section
          if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
              $line_of_tokens->{_line_type} = 'POD_START';
-            $write_logfile_entry->("Entering POD section\n");
+            write_logfile_numbered_msg("Entering POD section\n");
              $tokenizer_self->[_in_pod_] = 1;
              return $line_of_tokens;
          }
@@ -1147,13 +1152,13 @@ sub get_line {
                  warning(
  "=cut starts a pod section .. this can fool pod utilities.\n"
                  ) unless (DEVEL_MODE);
-                $write_logfile_entry->("Entering POD section\n");
+                write_logfile_numbered_msg("Entering POD section\n");
              }
          }
  
          else {
              $line_of_tokens->{_line_type} = 'POD_START';
-            $write_logfile_entry->("Entering POD section\n");
+            write_logfile_numbered_msg("Entering POD section\n");
          }
  
          return $line_of_tokens;
@@ -1163,7 +1168,7 @@ sub get_line {
      if ( $tokenizer_self->[_in_skipped_] ) {
  
          $line_of_tokens->{_line_type} = 'SKIP';
-        $write_logfile_entry->("Entering code-skipping section\n");
+        write_logfile_numbered_msg("Entering code-skipping section\n");
          return $line_of_tokens;
      }
  
@@ -1176,7 +1181,7 @@ sub get_line {
          $tokenizer_self->[_in_here_doc_]          = 1;
          $tokenizer_self->[_here_doc_target_]      = $here_doc_target;
          $tokenizer_self->[_here_quote_character_] = $here_quote_character;
-        $write_logfile_entry->("Entering HERE document $here_doc_target\n");
+        write_logfile_numbered_msg("Entering HERE document $here_doc_target\n");
          $tokenizer_self->[_started_looking_for_here_target_at_] =
            $input_line_number;
      }
@@ -1186,13 +1191,13 @@ sub get_line {
      # which are not tokenized (and cannot be read with <DATA> either!).
      if ( $tokenizer_self->[_in_data_] ) {
          $line_of_tokens->{_line_type} = 'DATA_START';
-        $write_logfile_entry->("Starting __DATA__ section\n");
+        write_logfile_numbered_msg("Starting __DATA__ section\n");
          $tokenizer_self->[_saw_data_] = 1;
  
          # keep parsing after __DATA__ if use SelfLoader was seen
          if ( $tokenizer_self->[_saw_selfloader_] ) {
              $tokenizer_self->[_in_data_] = 0;
-            $write_logfile_entry->(
+            write_logfile_numbered_msg(
                  "SelfLoader seen, continuing; -nlsl deactivates\n");
          }
  
@@ -1201,13 +1206,13 @@ sub get_line {
  
      elsif ( $tokenizer_self->[_in_end_] ) {
          $line_of_tokens->{_line_type} = 'END_START';
-        $write_logfile_entry->("Starting __END__ section\n");
+        write_logfile_numbered_msg("Starting __END__ section\n");
          $tokenizer_self->[_saw_end_] = 1;
  
          # keep parsing after __END__ if use AutoLoader was seen
          if ( $tokenizer_self->[_saw_autoloader_] ) {
              $tokenizer_self->[_in_end_] = 0;
-            $write_logfile_entry->(
+            write_logfile_numbered_msg(
                  "AutoLoader seen, continuing; -nlal deactivates\n");
          }
          return $line_of_tokens;
@@ -1232,7 +1237,7 @@ sub get_line {
      # Note: if keyword 'format' occurs in this line code, it is still CODE
      # (keyword 'format' need not start a line)
      if ( $tokenizer_self->[_in_format_] ) {
-        $write_logfile_entry->("Entering format section\n");
+        write_logfile_numbered_msg("Entering format section\n");
      }
  
      if ( $tokenizer_self->[_in_quote_]
@@ -1244,7 +1249,7 @@ sub get_line {
              /^\s*$/ )
          {
              $tokenizer_self->[_line_start_quote_] = $input_line_number;
-            $write_logfile_entry->(
+            write_logfile_numbered_msg(
                  "Start multi-line quote or pattern ending in $quote_target\n");
          }
      }
@@ -1252,7 +1257,7 @@ sub get_line {
          && !$tokenizer_self->[_in_quote_] )
      {
          $tokenizer_self->[_line_start_quote_] = -1;
-        $write_logfile_entry->("End of multi-line quote or pattern\n");
+        write_logfile_numbered_msg("End of multi-line quote or pattern\n");
      }
  
      # we are returning a line of CODE
@@ -4534,8 +4539,16 @@ EOM
      } ## end sub tokenize_this_line
  
      sub tokenizer_main_loop {
+
          my ($is_END_or_DATA) = @_;
  
+        #---------------------------------
+        # Break one input line into tokens
+        #---------------------------------
+
+        # Input parameter:
+        #   $is_END_or_DATA is true for a __END__ or __DATA__ line
+
          # start by breaking the line into pre-tokens
          my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
          ( $rtokens, $rtoken_map, $rtoken_type ) =
@@ -4561,9 +4574,9 @@ EOM
          $i     = -1;
          $i_tok = -1;
  
-        # ------------------------------------------------------------
+        #-----------------------------
          # begin main tokenization loop
-        # ------------------------------------------------------------
+        #-----------------------------
  
          # we are looking at each pre-token of one line and combining them
          # into tokens
@@ -4897,10 +4910,14 @@ EOM
      sub tokenizer_wrapup_line {
          my ($line_of_tokens) = @_;
  
-        # We have broken the current line into tokens. Now we have to wrap up
-        # the result for shipping.  Most of the remaining work involves
-        # defining the two indentation parameters that the formatter needs
-        # (structural indentation level and continuation indentation).
+        #---------------------------------------------------------
+        # Package a line of tokens for shipping back to the caller
+        #---------------------------------------------------------
+
+        # Most of the remaining work involves defining the two indentation
+        # parameters that the formatter needs for each token:
+        # - $level    = structural indentation level and
+        # - $ci_level = continuation indentation level
  
          # The method for setting the indentation level is straightforward.
          # But the method used to define the continuation indentation is
@@ -9516,6 +9533,12 @@ sub write_on_underline {
  
  sub pre_tokenize {
  
+    my ( $str, $max_tokens_wanted ) = @_;
+
+    # Input parameter:
+    #  $max_tokens_wanted > 0  to stop on reaching this many tokens.
+    #                     = 0 means get all tokens
+
      # Break a string, $str, into a sequence of preliminary tokens.  We
      # are interested in these types of tokens:
      #   words       (type='w'),            example: 'max_tokens_wanted'
@@ -9529,9 +9552,8 @@ sub pre_tokenize {
      # An advantage of doing this pre-tokenization step is that it keeps almost
      # all of the regex work highly localized.  A disadvantage is that in some
      # very rare instances we will have to go back and split a pre-token.
-    my ( $str, $max_tokens_wanted ) = @_;
  
-    # we return references to these 3 arrays:
+    # Return parameters:
      my @tokens    = ();     # array of the tokens themselves
      my @token_map = (0);    # string position of start of each token
      my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm

index a56fa115bc3d9330fad9cfa139a21ba83badb287..b15fd84e622dbf059ea03d095a2957c5e1e382f9 100644 (file)
--- a/lib/Perl/Tidy/VerticalAligner.pm
+++ b/lib/Perl/Tidy/VerticalAligner.pm
@@ -530,7 +530,13 @@ BEGIN {
  
  sub valign_input {
  
-    # Place one line in the current vertical group.
+    #---------------------------------------------------------------------
+    # This is the front door of the vertical aligner.  On each call
+    # we receive one line of specially marked text for vertical alignment.
+    # We compare the line with the current group, and either:
+    # - the line joins the current group if alignments match, or
+    # - the current group is flushed and a new group is started otherwise
+    #---------------------------------------------------------------------
      #
      # The key input parameters describing each line are:
      #     $level          = indentation level of this line
@@ -1473,9 +1479,9 @@ EOM
          # Revert to the starting state if does not fit
          if ( $pad > $padding_available ) {
  
-            ################################################
+            #----------------------------------------------
              # Line does not fit -- revert to starting state
-            ################################################
+            #----------------------------------------------
              foreach my $alignment (@alignments) {
                  $alignment->restore_column();
              }
@@ -1487,9 +1493,9 @@ EOM
          $padding_available -= $pad;
      }
  
-    ######################################
+    #-------------------------------------
      # The line fits, the match is accepted
-    ######################################
+    #-------------------------------------
      return 1;
  
  }
@@ -1645,18 +1651,18 @@ sub _flush_group_lines {
  "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
      };
  
-    ############################################
+    #-------------------------------------------
      # Section 1: Handle a group of COMMENT lines
-    ############################################
+    #-------------------------------------------
      if ( $group_type eq 'COMMENT' ) {
          $self->_flush_comment_lines();
          return;
      }
  
-    #########################################################################
+    #------------------------------------------------------------------------
      # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
      # aligning happens here in the following steps:
-    #########################################################################
+    #------------------------------------------------------------------------
  
      # STEP 1: Remove most unmatched tokens. They block good alignments.
      my ( $max_lev_diff, $saw_side_comment ) =
@@ -2075,9 +2081,9 @@ sub sweep_left_to_right {
      my $ng_max = @{$rgroups} - 1;
      return unless ( $ng_max > 0 );
  
-    ############################################################################
+    #---------------------------------------------------------------------
      # Step 1: Loop over groups to find all common leading alignment tokens
-    ############################################################################
+    #---------------------------------------------------------------------
  
      my $line;
      my $rtokens;
@@ -2192,9 +2198,9 @@ sub sweep_left_to_right {
      }
      return unless @icommon;
  
-    ###########################################################
+    #----------------------------------------------------------
      # Step 2: Reorder and consolidate the list into a task list
-    ###########################################################
+    #----------------------------------------------------------
  
      # We have to work first from lowest token index to highest, then by group,
      # sort our list first on token index then group number
@@ -2220,9 +2226,9 @@ sub sweep_left_to_right {
          push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
      }
  
-    ###############################
+    #------------------------------
      # Step 3: Execute the task list
-    ###############################
+    #------------------------------
      do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
          $group_level );
      return;
@@ -3275,18 +3281,18 @@ sub match_line_pairs {
  
              # find number of leading common tokens
  
-            #################################
+            #---------------------------------
              # No match to hanging side comment
-            #################################
+            #---------------------------------
              if ( $line->{'is_hanging_side_comment'} ) {
  
                  # Should not get here; HSC's have been filtered out
                  $imax_align = -1;
              }
  
-            ##############################
+            #-----------------------------
              # Handle comma-separated lists
-            ##############################
+            #-----------------------------
              elsif ( $list_type && $list_type eq $list_type_m ) {
  
                  # do not align lists across a ci jump with new list method
@@ -3305,9 +3311,9 @@ sub match_line_pairs {
                  $imax_align = $i_nomatch - 1;
              }
  
-            ##################
+            #-----------------
              # Handle non-lists
-            ##################
+            #-----------------
              else {
                  my $i_nomatch = $imax_min + 1;
                  foreach my $i ( 0 .. $imax_min ) {
@@ -3583,9 +3589,9 @@ sub prune_alignment_tree {
  
      use constant EXPLAIN_PRUNE => 0;
  
-    ####################################################################
+    #-------------------------------------------------------------------
      # Prune Tree Step 1. Start by scanning the lines and collecting info
-    ####################################################################
+    #-------------------------------------------------------------------
  
      # Note that the caller had this info but we have to redo this now because
      # alignment tokens may have been deleted.
@@ -3631,9 +3637,9 @@ sub prune_alignment_tree {
      # the patterns and levels of the next line being tested at each depth
      my ( @token_patterns_next, @levels_next, @token_indexes_next );
  
-    #########################################################
+    #-----------------------------------------------------------
      # define a recursive worker subroutine for tree construction
-    #########################################################
+    #-----------------------------------------------------------
  
      # This is a recursive routine which is called if a match condition changes
      # at any depth when a new line is encountered.  It ends the match node
@@ -3692,9 +3698,9 @@ sub prune_alignment_tree {
          return;
      };    ## end sub end_node
  
-    ######################################################
+    #-----------------------------------------------------
      # Prune Tree Step 2. Loop to form the tree of matches.
-    ######################################################
+    #-----------------------------------------------------
      foreach my $jp ( 0 .. $jmax ) {
  
          # working with two adjacent line indexes, 'm'=minus, 'p'=plus
@@ -3764,9 +3770,9 @@ sub prune_alignment_tree {
          }
      } ## end loop to form tree of matches
  
-    ##########################################################
+    #---------------------------------------------------------
      # Prune Tree Step 3. Make links from parent to child nodes
-    ##########################################################
+    #---------------------------------------------------------
  
      # It seemed cleaner to do this as a separate step rather than during tree
      # construction.  The children nodes have links up to the parent node which
@@ -3801,9 +3807,9 @@ sub prune_alignment_tree {
          }
      };
  
-    #######################################################
+    #------------------------------------------------------
      # Prune Tree Step 4. Make a list of nodes to be deleted
-    #######################################################
+    #------------------------------------------------------
  
      #  list of lines with tokens to be deleted:
      #  [$jbeg, $jend, $level_keep]
@@ -3882,9 +3888,9 @@ sub prune_alignment_tree {
          @todo_list = @todo_next;
      } ## end loop to mark nodes to delete
  
-    #############################################################
+    #------------------------------------------------------------
      # Prune Tree Step 5. Loop to delete selected alignment tokens
-    #############################################################
+    #------------------------------------------------------------
      foreach my $item (@delete_list) {
          my ( $jbeg, $jend, $level_keep ) = @{$item};
          foreach my $jj ( $jbeg .. $jend ) {
@@ -4325,12 +4331,12 @@ sub get_extra_leading_spaces {
            ? $extra_indentation_spaces_wanted
            : $avail;
  
-        #########################################################
+        #--------------------------------------------------------
          # Note: min spaces can be negative; for example with -gnu
          # f(
          #   do { 1; !!(my $x = bless []); }
          #  );
-        #########################################################
+        #--------------------------------------------------------
          # The following rule is needed to match older formatting:
          # For multiple groups, we will keep spaces non-negative.
          # For a single group, we will allow a negative space.
@@ -4626,11 +4632,11 @@ sub align_side_comments {
  
  sub valign_output_step_A {
  
-    ###############################################################
+    #------------------------------------------------------------
      # This is Step A in writing vertically aligned lines.
      # The line is prepared according to the alignments which have
      # been found. Then it is shipped to the next step.
-    ###############################################################
+    #------------------------------------------------------------
  
      my ( $self, $rinput_hash ) = @_;
  
@@ -5139,12 +5145,12 @@ sub get_output_line_number {
  
      sub valign_output_step_B {
  
-        ###############################################################
+        #---------------------------------------------------------
          # This is Step B in writing vertically aligned lines.
          # Vertical tightness is applied according to preset flags.
          # In particular this routine handles stacking of opening
          # and closing tokens.
-        ###############################################################
+        #---------------------------------------------------------
  
          my ( $self, $rinput ) = @_;
  
@@ -5367,12 +5373,12 @@ sub get_output_line_number {
  
      sub valign_output_step_C {
  
-        ###############################################################
+        #-----------------------------------------------------------------------
          # This is Step C in writing vertically aligned lines.
          # Lines are either stored in a buffer or passed along to the next step.
          # The reason for storing lines is that we may later want to reduce their
          # indentation when -sot and -sct are both used.
-        ###############################################################
+        #-----------------------------------------------------------------------
          my (
              $self,
              $seqno_string,
@@ -5402,7 +5408,8 @@ sub get_output_line_number {
              # Start storing lines when we see a line with multiple stacked
              # opening tokens.
              # patch for RT #94354, requested by Colin Williams
-            if (   $seqno_string =~ /^\d+(\:+\d+)+$/
+            if (   index( $seqno_string, ':' ) >= 0
+                && $seqno_string =~ /^\d+(\:+\d+)+$/
                  && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
              {
  
@@ -5446,11 +5453,11 @@ sub get_output_line_number {
  
  sub valign_output_step_D {
  
-    ###############################################################
+    #----------------------------------------------------------------
      # This is Step D in writing vertically aligned lines.
      # It is the end of the vertical alignment pipeline.
      # Write one vertically aligned line of code to the output object.
-    ###############################################################
+    #----------------------------------------------------------------
  
      my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
author	Steve Hancock <perltidy@users.sourceforge.net>
	Sun, 11 Sep 2022 22:02:18 +0000 (15:02 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Sun, 11 Sep 2022 22:02:18 +0000 (15:02 -0700)
CHANGES.md		patch \| blob \| history
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history
lib/Perl/Tidy/VerticalAligner.pm		patch \| blob \| history