From ab1471df1204144a5327b564c2a3d3249b30f1a1 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Mon, 5 Oct 2020 08:52:33 -0700
Subject: [PATCH] rewrote sub excess_line_length and total_line_length

---
 lib/Perl/Tidy/Formatter.pm | 279 +++++++++++++++++++------------------
 1 file changed, 147 insertions(+), 132 deletions(-)

diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 8d963b1b..a715900f 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -307,7 +307,6 @@ BEGIN {
         _rlines_new_              => $i++,
         _rLL_                     => $i++,
         _Klimit_                  => $i++,
-        _rnested_pairs_           => $i++,
         _K_opening_container_     => $i++,
         _K_closing_container_     => $i++,
         _K_opening_ternary_       => $i++,
@@ -371,7 +370,6 @@ BEGIN {
     # holding the batches of tokens being processed.
     $i = 0;
     use constant {
-        _comma_count_in_batch_    => $i++,
         _starting_in_quote_       => $i++,
         _ending_in_quote_         => $i++,
         _is_static_block_comment_ => $i++,
@@ -643,7 +641,6 @@ sub new {
     $self->[_saw_END_or_DATA_]                 = 0;
 
     # Hashes related to container welding...
-    $self->[_rnested_pairs_]           = [];
     $self->[_radjusted_levels_]        = [];
     $self->[_rweld_len_left_closing_]  = {};
     $self->[_rweld_len_right_closing_] = {};
@@ -4272,103 +4269,6 @@ sub dump_verbatim {
     }
 } ## end closure scan_comments
 
-sub find_nested_pairs {
-    my $self = shift;
-
-    # This routine is called once per file to do preliminary work needed for
-    # the --weld-nested option.  This information is also needed for adding
-    # semicolons.
-
-    my $rLL = $self->[_rLL_];
-    return unless ( defined($rLL) && @{$rLL} );
-
-    my $K_opening_container = $self->[_K_opening_container_];
-    my $K_closing_container = $self->[_K_closing_container_];
-
-    # We define an array of pairs of nested containers
-    my @nested_pairs;
-
-    # Names of calling routines can either be marked as 'i' or 'w',
-    # and they may invoke a sub call with an '->'. We will consider
-    # any consecutive string of such types as a single unit when making
-    # weld decisions.  We also allow a leading !
-    my $is_name_type = {
-        'i'  => 1,
-        'w'  => 1,
-        'U'  => 1,
-        '->' => 1,
-        '!'  => 1,
-    };
-
-    # Loop over all closing container tokens
-    foreach my $inner_seqno ( keys %{$K_closing_container} ) {
-        my $K_inner_closing = $K_closing_container->{$inner_seqno};
-
-        # See if it is immediately followed by another, outer closing token
-        my $K_outer_closing = $self->K_next_nonblank($K_inner_closing);
-        next unless ( defined($K_outer_closing) );
-        my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
-        next unless ($outer_seqno);
-        my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
-        next unless ( $is_closing_token{$token_outer_closing} );
-
-        # Yes .. this is a possible nesting pair.  Now we have to check the
-        # opening tokens.  The can be separated by a small amount.
-        my $K_outer_opening = $K_opening_container->{$outer_seqno};
-        my $K_inner_opening = $K_opening_container->{$inner_seqno};
-        next unless defined($K_outer_opening) && defined($K_inner_opening);
-        my $K_diff = $K_inner_opening - $K_outer_opening;
-
-        # Count nonblank characters separating them
-        if ( $K_diff < 0 ) { next }    # Shouldn't happen
-        if ( $K_diff > 8 ) { next }    # for speed
-        my $Kn             = $K_outer_opening;
-        my $nonblank_count = 0;
-        my $type;
-        my $is_name;
-        for ( my $it = 0 ; $it < 10 ; $it++ ) {
-            $Kn = $self->K_next_nonblank($Kn);
-            if ( !defined($Kn) )           { $nonblank_count = 0; last }
-            if ( $Kn eq $K_inner_opening ) { $nonblank_count++;   last; }
-            my $last_type    = $type;
-            my $last_is_name = $is_name;
-            $type    = $rLL->[$Kn]->[_TYPE_];
-            $is_name = $is_name_type->{$type};
-            $nonblank_count++
-              unless ( $is_name && $last_is_name );
-            last if ( $nonblank_count > 2 );
-        }
-
-        if (   $nonblank_count == 1
-            || $nonblank_count == 2
-            && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
-        {
-            push @nested_pairs,
-              [ $inner_seqno, $outer_seqno, $K_inner_closing ];
-        }
-        next;
-    }
-
-    # The weld routine expects the pairs in order in the form
-    #   [$seqno_inner, $seqno_outer]
-    # And they must be in the same order as the inner closing tokens
-    # (otherwise, welds of three or more adjacent tokens will not work).  The K
-    # value of this inner closing token has temporarily been stored for
-    # sorting.
-    @nested_pairs =
-
-      # Drop the K index after sorting (it would cause trouble downstream)
-      map { [ $_->[0], $_->[1] ] }
-
-      # Sort on the K values
-      sort { $a->[2] <=> $b->[2] } @nested_pairs;
-
-    # FIXME: this could just be returned and passed on to sub weld_...
-    $self->[_rnested_pairs_] = \@nested_pairs;
-
-    return;
-}
-
 {    ## begin closure check_line_hashes
 
     # This code checks that no autovivification occurs in the 'line' hash
@@ -4665,10 +4565,10 @@ sub respace_tokens {
 
           );
 
-       # Do not add a semicolon if it would impede a weld with an immediately
-       # following closing token.  We will use an approximate rule here:
-       # Do not add a semicolon between two closing container tokens if it would
-       # be the only semicolon in the outer container.
+        # Do not add a semicolon if it would impede a weld with an immediately
+        # following closing token...like this
+        #   { ( some code ) }
+        #                  ^--No semicolon can go here
 
         # look at the previous token... (note use of the _new array here)
         my $token_prev  = $rLL_new->[$Kp]->[_TOKEN_];
@@ -5665,9 +5565,6 @@ sub weld_containers {
 
     if ( $rOpts->{'weld-nested-containers'} ) {
 
-        # Find nested pairs of container tokens for any welding.
-        $self->find_nested_pairs();
-
         # if called, weld_nested_containers must be called before other weld
         # operations.  This is because weld_nested_containers could overwrite
         # hash values written by weld_cuddled_blocks and weld_nested_quotes.
@@ -5880,6 +5777,100 @@ sub weld_cuddled_blocks {
     return;
 }
 
+sub find_nested_pairs {
+    my $self = shift;
+
+    # This routine is called once per file to do preliminary work needed for
+    # the --weld-nested option.  This information is also needed for adding
+    # semicolons.
+
+    my $rLL = $self->[_rLL_];
+    return unless ( defined($rLL) && @{$rLL} );
+
+    my $K_opening_container = $self->[_K_opening_container_];
+    my $K_closing_container = $self->[_K_closing_container_];
+
+    # We define an array of pairs of nested containers
+    my @nested_pairs;
+
+    # Names of calling routines can either be marked as 'i' or 'w',
+    # and they may invoke a sub call with an '->'. We will consider
+    # any consecutive string of such types as a single unit when making
+    # weld decisions.  We also allow a leading !
+    my $is_name_type = {
+        'i'  => 1,
+        'w'  => 1,
+        'U'  => 1,
+        '->' => 1,
+        '!'  => 1,
+    };
+
+    # Loop over all closing container tokens
+    foreach my $inner_seqno ( keys %{$K_closing_container} ) {
+        my $K_inner_closing = $K_closing_container->{$inner_seqno};
+
+        # See if it is immediately followed by another, outer closing token
+        my $K_outer_closing = $self->K_next_nonblank($K_inner_closing);
+        next unless ( defined($K_outer_closing) );
+        my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
+        next unless ($outer_seqno);
+        my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
+        next unless ( $is_closing_token{$token_outer_closing} );
+
+        # Yes .. this is a possible nesting pair.  Now we have to check the
+        # opening tokens.  The can be separated by a small amount.
+        my $K_outer_opening = $K_opening_container->{$outer_seqno};
+        my $K_inner_opening = $K_opening_container->{$inner_seqno};
+        next unless defined($K_outer_opening) && defined($K_inner_opening);
+        my $K_diff = $K_inner_opening - $K_outer_opening;
+
+        # Count nonblank characters separating them
+        if ( $K_diff < 0 ) { next }    # Shouldn't happen
+        if ( $K_diff > 8 ) { next }    # for speed
+        my $Kn             = $K_outer_opening;
+        my $nonblank_count = 0;
+        my $type;
+        my $is_name;
+        for ( my $it = 0 ; $it < 10 ; $it++ ) {
+            $Kn = $self->K_next_nonblank($Kn);
+            if ( !defined($Kn) )           { $nonblank_count = 0; last }
+            if ( $Kn eq $K_inner_opening ) { $nonblank_count++;   last; }
+            my $last_type    = $type;
+            my $last_is_name = $is_name;
+            $type    = $rLL->[$Kn]->[_TYPE_];
+            $is_name = $is_name_type->{$type};
+            $nonblank_count++
+              unless ( $is_name && $last_is_name );
+            last if ( $nonblank_count > 2 );
+        }
+
+        if (   $nonblank_count == 1
+            || $nonblank_count == 2
+            && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
+        {
+            push @nested_pairs,
+              [ $inner_seqno, $outer_seqno, $K_inner_closing ];
+        }
+        next;
+    }
+
+    # The weld routine expects the pairs in order in the form
+    #   [$seqno_inner, $seqno_outer]
+    # And they must be in the same order as the inner closing tokens
+    # (otherwise, welds of three or more adjacent tokens will not work).  The K
+    # value of this inner closing token has temporarily been stored for
+    # sorting.
+    @nested_pairs =
+
+      # Drop the K index after sorting (it would cause trouble downstream)
+      map { [ $_->[0], $_->[1] ] }
+
+      # Sort on the K values
+      sort { $a->[2] <=> $b->[2] } @nested_pairs;
+
+    return \@nested_pairs;
+}
+
 sub weld_nested_containers {
     my ($self) = @_;
 
@@ -5898,11 +5889,13 @@ sub weld_nested_containers {
 
     my $rLL                 = $self->[_rLL_];
     my $Klimit              = $self->get_rLL_max_index();
-    my $rnested_pairs       = $self->[_rnested_pairs_];
     my $rlines              = $self->[_rlines_];
     my $K_opening_container = $self->[_K_opening_container_];
     my $K_closing_container = $self->[_K_closing_container_];
 
+    # Find nested pairs of container tokens for any welding.
+    my $rnested_pairs = $self->find_nested_pairs();
+
     # Return unless there are nested pairs to weld
     return unless defined($rnested_pairs) && @{$rnested_pairs};
 
@@ -6382,17 +6375,16 @@ sub weld_len_right {
 sub weld_len_right_to_go {
     my ( $self, $i ) = @_;
 
-    # FIXME: this sub should be eliminated for efficiency. Make
-    # calls directly to sub weld_len_right instead, but watch out
-    # for the initial test on a blank.
+    # Given the index of a token in the 'to_go' array return the length of any
+    # weld to its right. 
 
-    # Given the index of a token in the 'to_go' array
-    # return the length of any weld to its right
-    return if ( $i < 0 );
+    # Back up at a blank.
+    return 0 if ( $i < 0 );
     if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
-    my $weld_len =
-      $self->weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
-    return $weld_len;
+
+    return $type_sequence_to_go[$i]
+      ? $self->weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] )
+      : 0;
 }
 
 sub mark_short_nested_blocks {
@@ -9345,7 +9337,6 @@ EOM
 
             }
         }
-        $this_batch->[_comma_count_in_batch_] = $comma_count_in_batch;
 
         my $comma_arrow_count_contained =
           $self->match_opening_and_closing_tokens();
@@ -9527,8 +9518,8 @@ EOM
                    $is_long_line
                 || $old_line_count_in_batch > 1
 
-               # must always call scan_list() with unbalanced batches because it
-               # is maintaining some stacks
+		# must always call scan_list() with unbalanced batches because
+		# it is maintaining some stacks
                 || is_unbalanced_batch()
 
                 # call scan_list if we might want to break at commas
@@ -9546,7 +9537,7 @@ EOM
             {
                 ## This caused problems in one version of perl for unknown reasons:
                 ## $saw_good_break ||= scan_list();
-                my $sgb = $self->scan_list();
+                my $sgb = $self->scan_list($is_long_line);
                 $saw_good_break ||= $sgb;
             }
 
@@ -12894,7 +12885,7 @@ sub set_continuation_breaks {
 
     sub scan_list {
 
-        my ($self) = @_;
+        my ( $self, $is_long_line ) = @_;
 
         # This routine is responsible for setting line breaks for all lists,
         # so that hierarchical structure can be displayed and so that list
@@ -12939,7 +12930,6 @@ sub set_continuation_breaks {
 
         check_for_new_minimum_depth($current_depth);
 
-        my $is_long_line = $self->excess_line_length( 0, $max_index_to_go ) > 0;
         my $want_previous_breakpoint = -1;
 
         my $saw_good_breakpoint;
@@ -14966,24 +14956,49 @@ sub total_line_length {
 
     # return length of a line of tokens ($ibeg .. $iend)
     my ( $ibeg, $iend ) = @_;
-    return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+
+    # original coding:
+    #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+
+    # this is basically sub 'leading_spaces_to_go':
+    my $indentation = $leading_spaces_to_go[$ibeg];
+    if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
+
+    return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
+      $summed_lengths_to_go[$ibeg];
 }
 
+
 sub excess_line_length {
 
     # return number of characters by which a line of tokens ($ibeg..$iend)
     # exceeds the allowable line length.
+
+    # NOTE: Profiling shows that this is a critical routine for efficiency.
+    # Therefore I have eliminated additional calls to subs from it.
     my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
 
+    # Original expression for line length
+    ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); 
+
+    # This is basically sub 'leading_spaces_to_go':
+    my $indentation = $leading_spaces_to_go[$ibeg];
+    if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
+
+    my $length =
+      $indentation +
+      $summed_lengths_to_go[ $iend + 1 ] -
+      $summed_lengths_to_go[$ibeg];
+
     # Include right weld lengths unless requested not to.
-    my $wr =
-      $ignore_right_weld
-      ? 0
-      : $self->weld_len_right( $type_sequence_to_go[$iend],
-        $types_to_go[$iend] );
-
-    return total_line_length( $ibeg, $iend ) + $wr -
-      $maximum_line_length[ $levels_to_go[$ibeg] ];
+    if ( !$ignore_right_weld && $type_sequence_to_go[$iend] ) {
+        my $wr = $self->weld_len_right( $type_sequence_to_go[$iend],
+            $types_to_go[$iend] );
+        $length += $wr;
+    }
+
+    # return the excess
+    return $length - $maximum_line_length[ $levels_to_go[$ibeg] ];
 }
 
 sub get_spaces {
-- 
2.39.5