From 3fd898b7164c461bff7707b1ce6018672e8a1b5d Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Sat, 11 Apr 2020 18:07:30 -0700
Subject: [PATCH] added var _TOKEN_LENGTH_ and consolidated length coding in
 Formatter.pm

---
 lib/Perl/Tidy/Formatter.pm       | 266 +++++++++++++++++--------------
 lib/Perl/Tidy/VerticalAligner.pm |   7 +-
 2 files changed, 153 insertions(+), 120 deletions(-)

diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 53c13a11..092fed7f 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -248,7 +248,6 @@ use vars qw{
   $rOpts_brace_left_and_indent
   $rOpts_closing_side_comment_maximum_text
   $rOpts_continuation_indentation
-  $rOpts_ignore_side_comment_lengths
   $rOpts_indent_columns
   $rOpts_line_up_parentheses
   $rOpts_maximum_line_length
@@ -303,6 +302,7 @@ BEGIN {
         _LEVEL_TRUE_            => $i++,
         _SLEVEL_                => $i++,
         _TOKEN_                 => $i++,
+        _TOKEN_LENGTH_          => $i++,
         _TYPE_                  => $i++,
         _TYPE_SEQUENCE_         => $i++,
     };
@@ -2329,9 +2329,14 @@ sub respace_tokens {
     my $rtoken_vars;
     my $Kmax = @{$rLL} - 1;
 
+    my $CODE_type = "";
+    my $line_type = "";
+
     my $rOpts_add_whitespace            = $rOpts->{'add-whitespace'};
     my $rOpts_delete_old_whitespace     = $rOpts->{'delete-old-whitespace'};
     my $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+    my $rOpts_ignore_side_comment_lengths =
+      $rOpts->{'ignore-side-comment-lengths'};
 
     # Set the whitespace flags, which indicate the token spacing preference.
     my $rwhitespace_flags = $self->set_whitespace_flags();
@@ -2409,22 +2414,34 @@ sub respace_tokens {
             }
         }
 
-        # find the length of this token
+        my $type = $item->[_TYPE_];
+
+        # trim comments
+        if ( $type eq '#' ) {
+            $item->[_TOKEN_] =~ s/\s*$//;
+        }
+
+        # Find the length of this token.  Later it may be adjusted if phantom
+        # or ignoring side comment lengths.
         my $token_length = length( $item->[_TOKEN_] );
 
+        # FIXME: Can remove $rOpts_ignore... as global
+        # Mark length of side comments as just 1 if their lengths are ignored
+        if (   $type eq '#'
+            && $rOpts_ignore_side_comment_lengths
+            && ( !$CODE_type || $CODE_type eq 'HSC' ) )
+        {
+            $token_length = 1;
+        }
+
+        $item->[_TOKEN_LENGTH_] = $token_length;
+
         # and update the cumulative length
         $cumulative_length += $token_length;
 
         # Save the length sum to just AFTER this token
         $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
 
-        my $type = $item->[_TYPE_];
-
-        # trim side comments
-        if ( $type eq '#' ) {
-            $item->[_TOKEN_] =~ s/\s*$//;
-        }
-
         if ( $type && $type ne 'b' && $type ne '#' ) {
             $last_nonblank_type       = $type;
             $last_nonblank_token      = $item->[_TOKEN_];
@@ -2527,10 +2544,16 @@ sub respace_tokens {
             # Convert the existing blank to:
             #   a phantom semicolon for one_line_block option = 0 or 1
             #   a real semicolon    for one_line_block option = 2
-            my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : '';
+            my $tok     = '';
+            my $len_tok = 0;
+            if ( $rOpts_one_line_block_semicolons == 2 ) {
+                $tok     = ';';
+                $len_tok = 1;
+            }
 
-            $rLL_new->[$Ktop]->[_TOKEN_] = $tok;    # zero length if phantom
-            $rLL_new->[$Ktop]->[_TYPE_]  = ';';
+            $rLL_new->[$Ktop]->[_TOKEN_]        = $tok;
+            $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
+            $rLL_new->[$Ktop]->[_TYPE_]         = ';';
             $rLL_new->[$Ktop]->[_SLEVEL_] =
               $rLL->[$KK]->[_SLEVEL_];
 
@@ -2607,8 +2630,6 @@ sub respace_tokens {
 
     # Main loop over all lines of the file
     my $last_K_out;
-    my $CODE_type = "";
-    my $line_type = "";
 
     # Testing option to break qw.  Do not use; it can make a mess.
     my $ALLOW_BREAK_MULTILINE_QW = 0;
@@ -4583,26 +4604,6 @@ EOM
     return;
 }
 
-sub sum_token_lengths {
-    my $self = shift;
-
-    # This has been merged into 'respace_tokens' but retained for reference
-    my $rLL               = $self->{rLL};
-    my $cumulative_length = 0;
-    for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
-
-        # now set the length of this token
-        my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
-
-        $cumulative_length += $token_length;
-
-        # Save the length sum to just AFTER this token
-        $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
-
-    }
-    return;
-}
-
 sub resync_lines_and_tokens {
 
     my $self   = shift;
@@ -6006,11 +6007,9 @@ EOM
     $rOpts_closing_side_comment_maximum_text =
       $rOpts->{'closing-side-comment-maximum-text'};
     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
-    $rOpts_ignore_side_comment_lengths =
-      $rOpts->{'ignore-side-comment-lengths'};
-    $rOpts_indent_columns      = $rOpts->{'indent-columns'};
-    $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
-    $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+    $rOpts_indent_columns           = $rOpts->{'indent-columns'};
+    $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
+    $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
     $rOpts_variable_maximum_line_length =
       $rOpts->{'variable-maximum-line-length'};
     $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
@@ -6961,7 +6960,7 @@ sub tight_paren_follows {
     # if ( do { $2 !~ /&/ } ) { ... }
 
     # Example: $K_ic - $K_oo = 10    [Pass rule 2]
-    #          $K_io - $K_oo = 9     [Fail rule 3]
+    #          $K_io - $K_oo = 9     [Pass rule 3]
     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
 
     return if ( $K_io - $K_oo > 9 );
@@ -6995,29 +6994,6 @@ sub tight_paren_follows {
     return 1;
 }
 
-sub token_length {
-
-    # Returns the length of a token, given:
-    #  $token=text of the token
-    #  $type = type
-    #  $not_first_token = should be TRUE if this is not the first token of
-    #   the line.  It might the index of this token in an array.  It is
-    #   used to test for a side comment vs a block comment.
-
-    # uses Global vars:  $rOpts_ignore_side_comment_lengths
-
-    my ( $token, $type, $not_first_token ) = @_;
-    my $token_length = length($token);
-
-    # We mark lengths of side comments as just 1 if we are
-    # ignoring their lengths when setting line breaks.
-    $token_length = 1
-      if ( $rOpts_ignore_side_comment_lengths
-        && $not_first_token
-        && $type eq '#' );
-    return $token_length;
-}
-
 sub copy_hash {
 
     # NOTE: This sub is not currently used
@@ -7152,20 +7128,12 @@ EOM
         }
     }
 
-    sub rtoken_length {
-
-        # return length of ith token in @{$rtokens}
-        my ($i) = @_;
-        return token_length( $rinput_token_array->[$i]->[_TOKEN_],
-            $rinput_token_array->[$i]->[_TYPE_], $i );
-    }
-
     # Routine to place the current token into the output stream.
     # Called once per output token.
     sub store_token_to_go {
 
         my ( $self, $side_comment_follows ) = @_;
-
+        my $rLL  = $self->{rLL};
         my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
 
         ++$max_index_to_go;
@@ -7199,8 +7167,26 @@ EOM
           if ( $iprev >= 0 && $type ne 'b' );
         $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
 
-        $token_lengths_to_go[$max_index_to_go] =
-          token_length( $token, $type, $max_index_to_go );
+        my $length = $rLL->[$Ktoken_vars]->[_TOKEN_LENGTH_];
+
+        # FIXME: Patch for indent-only, in which the entire set of tokens is
+        # turned into type 'q'. Lengths have not been defined because sub
+        # 'respace_tokens' is bypassed. We do not need lengths in this case,
+        # but we will use the character count to have a defined value.  In the
+        # future, it would be nicer to have 'respace_tokens' convert the lines
+        # to quotes and get correct lengths.
+        if ( !defined($length) ) {
+            $length = length($token);
+
+            # FIXME: _TOKEN_LENGTH_ Debug code to be removed eventually
+            if ( 0 && $type ne 'q' || $max_index_to_go != 0 ) {
+
+                Fault(
+"Undefined length for type=$type, tok=$token, index=$max_index_to_go\n"
+                );
+            }
+        }
+        $token_lengths_to_go[$max_index_to_go] = $length;
 
         # We keep a running sum of token lengths from the start of this batch:
         #   summed_lengths_to_go[$i]   = total length to just before token $i
@@ -7373,9 +7359,6 @@ EOM
                 $last_line_leading_type = 'b';
             }
 
-            # TRIM COMMENTS -- This could be turned off as a option
-            $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//;    # trim right end
-
             if (
                 $rOpts->{'indent-block-comments'}
                 && (  !$rOpts->{'indent-spaced-block-comments'}
@@ -8447,7 +8430,7 @@ sub starting_one_line_block {
 
         # old whitespace could be arbitrarily large, so don't use it
         if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
-        else { $pos += rtoken_length($i) }
+        else { $pos += $rtoken_array->[$i]->[_TOKEN_LENGTH_] }
 
         # ignore some small blocks
         my $type_sequence = $rtoken_array->[$i]->[_TYPE_SEQUENCE_];
@@ -8514,7 +8497,7 @@ sub starting_one_line_block {
                 && !$is_sort_map_grep{$block_type} )
             {
 
-                $pos += rtoken_length($i_nonblank);
+                $pos += $rtoken_array->[$i_nonblank]->[_TOKEN_LENGTH_];
 
                 if ( $i_nonblank > $i + 1 ) {
 
@@ -8523,7 +8506,7 @@ sub starting_one_line_block {
                     if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
                         $pos += 1;
                     }
-                    else { $pos += rtoken_length( $i + 1 ) }
+                    else { $pos += $rtoken_array->[ $i + 1 ]->[_TOKEN_LENGTH_] }
                 }
 
                 if ( $pos >= maximum_line_length($i_start) ) {
@@ -8732,12 +8715,18 @@ sub pad_token {
 
     # insert $pad_spaces before token number $ipad
     my ( $self, $ipad, $pad_spaces ) = @_;
-    my $rLL = $self->{rLL};
+    my $rLL     = $self->{rLL};
+    my $KK      = $K_to_go[$ipad];
+    my $tok     = $rLL->[$KK]->[_TOKEN_];
+    my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
+
     if ( $pad_spaces > 0 ) {
-        $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
+        $tok = ' ' x $pad_spaces . $tok;
+        $tok_len += $pad_spaces;
     }
     elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
-        $tokens_to_go[$ipad] = "";
+        $tok     = "";
+        $tok_len = 0;
     }
     else {
 
@@ -8745,10 +8734,12 @@ sub pad_token {
         return;
     }
 
-    # Keep token arrays in sync
-    $self->sync_token_K($ipad);
+    $tok     = $rLL->[$KK]->[_TOKEN_]        = $tok;
+    $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
 
     $token_lengths_to_go[$ipad] += $pad_spaces;
+    $tokens_to_go[$ipad] = $tok;
+
     foreach my $i ( $ipad .. $max_index_to_go ) {
         $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
     }
@@ -9868,6 +9859,7 @@ sub make_else_csc_text {
 sub add_closing_side_comment {
 
     my $self = shift;
+    my $rLL  = $self->{rLL};
 
     # add closing side comments after closing block braces if -csc used
     my ( $closing_side_comment, $cscw_block_comment );
@@ -10038,7 +10030,10 @@ sub add_closing_side_comment {
             # switch to the new csc (unless we deleted it!)
             if ($token) {
                 $tokens_to_go[$max_index_to_go] = $token;
-                $self->sync_token_K($max_index_to_go);
+                my $K = $K_to_go[$max_index_to_go];
+                $rLL->[$K]->[_TOKEN_] = $token;
+                $rLL->[$K]->[_TOKEN_LENGTH_] =
+                  length($token);    # NOTE: length no longer important
             }
         }
 
@@ -10198,7 +10193,7 @@ sub send_lines_to_vertical_aligner {
         $self->delete_needless_alignments( $ibeg, $iend,
             $ralignment_type_to_go );
 
-        my ( $rtokens, $rfields, $rpatterns ) =
+        my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
           $self->make_alignment_patterns( $ibeg, $iend,
             $ralignment_type_to_go );
 
@@ -10283,9 +10278,28 @@ sub send_lines_to_vertical_aligner {
             }
         }
 
+        # FIXME: _TOKEN_LENGTH_ Debug code to be removed eventually
+        if (0) {
+            my $nfld = @{$rfields};
+            for ( my $k = 0 ; $k < $nfld ; $k++ ) {
+                my $field = $rfields->[$k];
+                my $len   = $rfield_lengths->[$k];
+                my $len2  = length($field);
+                if ( $len != $len2 ) {
+                    if ( $field !~ /^#/ ) {
+                        Warn("len=$len != $len2 for field: '$field'");
+                    }
+                }
+            }
+        }
+
         # add any new closing side comment to the last line
         if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
             $rfields->[-1] .= " $closing_side_comment";
+
+            # NOTE: Patch for csc. We can just use 1 for the length of the csc
+            # because its length should not be a limiting factor from here on.
+            $rfield_lengths->[-1] += 2;
         }
 
         # send this new line down the pipe
@@ -10300,11 +10314,14 @@ sub send_lines_to_vertical_aligner {
         $rvalign_hash->{do_not_pad}                = $do_not_pad;
         $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
         $rvalign_hash->{level_jump}                = $level_jump;
+        $rvalign_hash->{rfields}                   = $rfields;
+        $rvalign_hash->{rpatterns}                 = $rpatterns;
+        $rvalign_hash->{rtokens}                   = $rtokens;
+        $rvalign_hash->{rfield_lengths}            = $rfield_lengths;
 
         $rvalign_hash->{valign_batch_number} = $valign_batch_number;
 
-        Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
-            $rtokens, $rpatterns );
+        Perl::Tidy::VerticalAligner::valign_input($rvalign_hash);
 
         $in_comma_list = $type_end eq ',' && $forced_breakpoint;
 
@@ -10493,6 +10510,15 @@ sub send_lines_to_vertical_aligner {
         return;
     }
 
+    my $field_length_sum = sub {
+        my ( $i1, $i2 ) = @_;
+        my $len_field = 0;
+        foreach ( $i1 .. $i2 ) {
+            $len_field += $token_lengths_to_go[$_];
+        }
+        return $len_field;
+    };
+
     sub make_alignment_patterns {
 
         # Here we do some important preliminary work for the
@@ -10515,10 +10541,11 @@ sub send_lines_to_vertical_aligner {
         #   field.  These should normally each match before alignment is
         #   allowed, even when the alignment tokens match.
         my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
-        my @tokens   = ();
-        my @fields   = ();
-        my @patterns = ();
-        my $i_start  = $ibeg;
+        my @tokens        = ();
+        my @fields        = ();
+        my @patterns      = ();
+        my @field_lengths = ();
+        my $i_start       = $ibeg;
 
         my $depth                 = 0;
         my @container_name        = ("");
@@ -10708,6 +10735,8 @@ sub send_lines_to_vertical_aligner {
                 push( @fields,
                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
 
+                push @field_lengths, $field_length_sum->( $i_start, $i - 1 );
+
                 # store the alignment token for this field
                 push( @tokens, $tok );
 
@@ -10778,7 +10807,8 @@ sub send_lines_to_vertical_aligner {
 
         # done with this line .. join text of tokens to make the last field
         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
-        return ( \@tokens, \@fields, \@patterns );
+        push @field_lengths, $field_length_sum->( $i_start, $iend );
+        return ( \@tokens, \@fields, \@patterns, \@field_lengths );
     }
 
 }    # end make_alignment_patterns
@@ -11603,7 +11633,7 @@ sub mate_index_to_go {
     }
     my $i_mate_alt = $mate_index_to_go[$i];
 
-    # Debug code to eventually be removed
+    # FIXME: Debug code to be removed eventually
     if ( 0 && $i_mate_alt != $i_mate ) {
         my $tok       = $tokens_to_go[$i];
         my $type      = $types_to_go[$i];
@@ -12764,8 +12794,9 @@ sub terminal_type_K {
         @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
         my $code_bias = -.01;    # bias for closing block braces
 
-        my $type  = 'b';
-        my $token = ' ';
+        my $type         = 'b';
+        my $token        = ' ';
+        my $token_length = 1;
         my $last_type;
         my $last_nonblank_type  = $type;
         my $last_nonblank_token = $token;
@@ -12791,6 +12822,7 @@ sub terminal_type_K {
             }
 
             $token               = $tokens_to_go[$i];
+            $token_length        = $token_lengths_to_go[$i];
             $block_type          = $block_type_to_go[$i];
             $i_next              = $i + 1;
             $next_type           = $types_to_go[$i_next];
@@ -13133,8 +13165,7 @@ sub terminal_type_K {
                     if ( $right_key eq '.' ) {
                         unless (
                             $last_nonblank_type eq '.'
-                            && (
-                                length($token) <=
+                            && ( $token_length <=
                                 $rOpts_short_concatenation_item_length )
                             && ( !$is_closing_token{$token} )
                           )
@@ -15624,21 +15655,6 @@ sub undo_forced_breakpoint_stack {
     return;
 }
 
-sub sync_token_K {
-    my ( $self, $i ) = @_;
-
-    # Keep tokens in the rLL array in sync with the _to_go array
-    my $rLL = $self->{rLL};
-    my $K   = $K_to_go[$i];
-    if ( defined($K) ) {
-        $rLL->[$K]->[_TOKEN_] = $tokens_to_go[$i];
-    }
-    else {
-        # shouldn't happen
-    }
-    return;
-}
-
 {    # begin recombine_breakpoints
 
     my %is_amp_amp;
@@ -15742,7 +15758,10 @@ sub sync_token_K {
             next if ($semicolon_count);
 
             # ...ok, then make the semicolon invisible
-            $tokens_to_go[$i_semicolon] = "";
+            $tokens_to_go[$i_semicolon]            = "";
+            $token_lengths_to_go[$i_semicolon]     = 0;
+            $rLL->[$K_semicolon]->[_TOKEN_]        = "";
+            $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
         }
         return;
     }
@@ -15754,6 +15773,7 @@ sub sync_token_K {
         # Walk down the lines of this batch and unmask any invisible line-ending
         # semicolons.  They were placed by sub respace_tokens but we only now
         # know if we actually need them.
+        my $rLL = $self->{rLL};
 
         my $nmax = @{$ri_end} - 1;
         foreach my $n ( 0 .. $nmax ) {
@@ -15761,10 +15781,18 @@ sub sync_token_K {
             my $i = $ri_end->[$n];
             if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
 
-                $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
-                $self->sync_token_K($i);
-
-                my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
+                my $tok     = ';';
+                my $tok_len = 1;
+                if ( $want_left_space{';'} != WS_NO ) {
+                    $tok     = ' ;';
+                    $tok_len = 2;
+                }
+                $tokens_to_go[$i]        = $tok;
+                $token_lengths_to_go[$i] = $tok_len;
+                my $KK = $K_to_go[$i];
+                $rLL->[$KK]->[_TOKEN_]        = $tok;
+                $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+                my $line_number = 1 + $self->get_old_line_index($KK);
                 note_added_semicolon($line_number);
             }
         }
diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm
index f53848af..a2c0ffdb 100644
--- a/lib/Perl/Tidy/VerticalAligner.pm
+++ b/lib/Perl/Tidy/VerticalAligner.pm
@@ -372,7 +372,8 @@ sub valign_input {
     # side comments.  Tabs in these fields can mess up the column counting.
     # The log file warns the user if there are any such tabs.
 
-    my ( $rline_hash, $rfields, $rtokens, $rpatterns ) = @_;
+    my ( $rline_hash ) = @_;
+
     my $level                     = $rline_hash->{level};
     my $level_end                 = $rline_hash->{level_end};
     my $indentation               = $rline_hash->{indentation};
@@ -383,6 +384,10 @@ sub valign_input {
     my $do_not_pad                = $rline_hash->{do_not_pad};
     my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
     my $level_jump                = $rline_hash->{level_jump};
+    my $rfields                   = $rline_hash->{rfields};
+    my $rtokens                   = $rline_hash->{rtokens};
+    my $rpatterns                 = $rline_hash->{rpatterns};
+    my $rfield_lengths            = $rline_hash->{rfield_lengths};
 
     # number of fields is $jmax
     # number of tokens between fields is $jmax-1
-- 
2.39.5