From 74355058945d5f49d4b01ab215a206188004c80d Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Wed, 1 Sep 2021 07:34:07 -0700 Subject: [PATCH] eliminate recursion in sub set_forced_breakpoint --- lib/Perl/Tidy/Formatter.pm | 332 +++++++++++++++++++++++-------------- 1 file changed, 212 insertions(+), 120 deletions(-) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 5fdc08aa..30ac1e6e 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -1,4 +1,4 @@ -#################################################################### +##################################################################### # # The Perl::Tidy::Formatter package adds indentation, whitespace, and # line breaks to the token stream @@ -846,6 +846,28 @@ sub new { # CODE SECTION 2: Some Basic Utilities ###################################### +sub check_rLL { + + # Verify that the rLL array has not been auto-vivified + my ( $self, $msg ) = @_; + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $num = @{$rLL}; + if ( $num > 0 && !defined($Klimit) || $Klimit != $num - 1 ) { + + # This fault can occur if the array has been accessed for an index + # greater than $Klimit, which is the last token index. Just accessing + # the array above index $Klimit, not setting a value, can cause @rLL to + # increase beyond $Klimit. If this occurs, the problem can be located + # by making calls to this routine at different locations in + # sub 'finish_formatting'. + $Klimit = '' if ( !defined($Klimit) ); + $msg = "" unless $msg; + Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n"); + } + return; +} + sub check_keys { my ( $rtest, $rvalid, $msg, $exact_match ) = @_; @@ -4983,6 +5005,11 @@ EOM $self->adjust_indentation_levels(); + # Verify that the main token array looks OK. If this ever causes a fault + # then place similar checks before the sub calls above to localize the + # problem. + $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE); + # Finishes formatting and write the result to the line sink. # Eventually this call should just change the 'rlines' data according to the # new line breaks and then return so that we can do an internal iteration @@ -5370,8 +5397,11 @@ sub dump_verbatim { my %wU; my %wiq; +my %is_wit; +my %is_sigil; my %is_nonlist_keyword; my %is_nonlist_type; +my %is_special_check_type; BEGIN { @@ -5382,6 +5412,12 @@ BEGIN { @q = qw(w i q Q G C Z); @{wiq}{@q} = (1) x scalar(@q); + @q = qw(w i t); + @{is_wit}{@q} = (1) x scalar(@q); + + @q = qw($ & % * @); + @{is_sigil}{@q} = (1) x scalar(@q); + # Parens following these keywords will not be marked as lists. Note that # 'for' is not included and is handled separately, by including 'f' in the # hash %is_counted_type, since it may or may not be a c-style for loop. @@ -6150,54 +6186,58 @@ sub respace_tokens { if ($type_sequence) { - if ( $is_closing_token{$token} ) { - - # Insert a tentative missing semicolon if the next token is - # a closing block brace - if ( - $type eq '}' - && $token eq '}' + # Insert a tentative missing semicolon if the next token is + # a closing block brace + if ( + $type eq '}' + && $token eq '}' - # not preceded by a ';' - && $last_nonblank_code_type ne ';' + # not preceded by a ';' + && $last_nonblank_code_type ne ';' - # and this is not a VERSION stmt (is all one line, we - # are not inserting semicolons on one-line blocks) - && $CODE_type ne 'VER' + # and this is not a VERSION stmt (is all one line, we + # are not inserting semicolons on one-line blocks) + && $CODE_type ne 'VER' - # and we are allowed to add semicolons - && $rOpts->{'add-semicolons'} - ) - { - $add_phantom_semicolon->($KK); - } + # and we are allowed to add semicolons + && $rOpts->{'add-semicolons'} + ) + { + $add_phantom_semicolon->($KK); } } # Modify certain tokens here for whitespace # The following is not yet done, but could be: # sub (x x x) - elsif ( $type =~ /^[wit]$/ ) { - - # Examples: <> - # change '$ var' to '$var' etc - # change '@ ' to '@' - my ( $sigil, $word ) = split /\s+/, $token, 2; - if ( length($sigil) == 1 - && $sigil =~ /^[\$\&\%\*\@]$/ ) - { - $token = $sigil; - $token .= $word if ($word); - $rtoken_vars->[_TOKEN_] = $token; + # ( $type =~ /^[wit]$/ ) + elsif ( $is_wit{$type} ) { + + my $leading_char = substr( $token, 0, 1 ); + + # $sigil =~ /^[\$\&\%\*\@]$/ ) + if ( $is_sigil{$leading_char} ) { + + # change '$ var' to '$var' etc + # change '@ ' to '@' + # Examples: <> + my ( $sigil, $word ) = split /\s+/, $token, 2; + if ( length($sigil) == 1 ) { + { + $token = $sigil; + $token .= $word if ($word); + $rtoken_vars->[_TOKEN_] = $token; + } + } } - # Split identifiers with leading arrows, inserting blanks if - # necessary. It is easier and safer here than in the - # tokenizer. For example '->new' becomes two tokens, '->' and - # 'new' with a possible blank between. + # Split identifiers with leading arrows, inserting blanks + # if necessary. It is easier and safer here than in the + # tokenizer. For example '->new' becomes two tokens, '->' + # and 'new' with a possible blank between. # # Note: there is a related patch in sub set_whitespace_flags - if ( substr( $token, 0, 1 ) eq '-' + elsif ($leading_char eq '-' && $token =~ /^\-\>(.*)$/ && $1 ) { @@ -6899,17 +6939,16 @@ sub resync_lines_and_tokens { # since they have probably changed due to inserting and deleting blanks # and a few other tokens. - my $Kmax = -1; - # This is the next token and its line index: my $Knext = 0; - if ( defined($rLL) && @{$rLL} ) { - $Kmax = @{$rLL} - 1; - } + my $Kmax = defined($Klimit) ? $Klimit : -1; - if ( DEVEL_MODE && $Kmax ) { + # Verify that old line indexes are in still order. If this error occurs, + # check locations where sub 'respace_tokens' creates new tokens (like + # blank spaces). It must have set a bad old line index. + if ( DEVEL_MODE && defined($Klimit) ) { my $iline = $rLL->[0]->[_LINE_INDEX_]; - for ( my $KK = 1 ; $KK <= $Kmax ; $KK++ ) { + for ( my $KK = 1 ; $KK <= $Klimit ; $KK++ ) { my $iline_last = $iline; $iline = $rLL->[$KK]->[_LINE_INDEX_]; if ( $iline < $iline_last ) { @@ -7061,7 +7100,6 @@ EOM $ris_essential_old_breakpoint->{$Klast_prev} = 1; } } - return; } @@ -8890,9 +8928,13 @@ sub adjust_indentation_levels { # levels. It would be much nicer to have the weld routines also use this # adjustment, but that gets complicated when we combine -gnu -wn and have # some welded quotes. - my $radjusted_levels = $self->[_radjusted_levels_]; + my $Klimit = $self->[_Klimit_]; my $rLL = $self->[_rLL_]; - foreach my $KK ( 0 .. @{$rLL} - 1 ) { + my $radjusted_levels = $self->[_radjusted_levels_]; + + return unless ( defined($Klimit) ); + + foreach my $KK ( 0 .. $Klimit ) { $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_]; } @@ -11255,39 +11297,39 @@ EOM } # This is a good place to kill incomplete one-line blocks - if ( - ( - ( $semicolons_before_block_self_destruct == 0 ) - && ( $max_index_to_go >= 0 ) - && ( $last_old_nonblank_type eq ';' ) - && ( $first_new_nonblank_token ne '}' ) - ) - - # Patch for RT #98902. Honor request to break at old commas. - || ( $rOpts_break_at_old_comma_breakpoints - && $max_index_to_go >= 0 - && $last_old_nonblank_type eq ',' ) - ) - { - $forced_breakpoint_to_go[$max_index_to_go] = 1 - if ($rOpts_break_at_old_comma_breakpoints); - destroy_one_line_block(); - $self->end_batch(); - } + if ( $max_index_to_go >= 0 ) { + if ( + ( + ( $semicolons_before_block_self_destruct == 0 ) + && ( $last_old_nonblank_type eq ';' ) + && ( $first_new_nonblank_token ne '}' ) + ) - # Keep any requested breaks before this line. Note that we have to - # use the original K_first because it may have been reduced above - # to add a blank. The value of the flag is as follows: - # 1 => hard break, flush the batch - # 2 => soft break, set breakpoint and continue building the batch - if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) { - destroy_one_line_block(); - if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { - $self->set_forced_breakpoint($max_index_to_go); - } - else { + # Patch for RT #98902. Honor request to break at old commas. + || ( $rOpts_break_at_old_comma_breakpoints + && $last_old_nonblank_type eq ',' ) + ) + { + $forced_breakpoint_to_go[$max_index_to_go] = 1 + if ($rOpts_break_at_old_comma_breakpoints); + destroy_one_line_block(); $self->end_batch(); } + + # Keep any requested breaks before this line. Note that we have to + # use the original K_first because it may have been reduced above + # to add a blank. The value of the flag is as follows: + # 1 => hard break, flush the batch + # 2 => soft break, set breakpoint and continue building the batch + if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) { + destroy_one_line_block(); + if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { + $self->set_forced_breakpoint($max_index_to_go); + } + else { + $self->end_batch(); + } + } } # loop to process the tokens one-by-one @@ -12380,13 +12422,77 @@ sub compare_indentation_levels { # - If a break is made after an opening token, then a break will # also be made before the corresponding closing token. - return unless defined $i && $i >= 0; + if ( !defined($i) || $i < 0 ) { + + # Calls with bad index $i are harmless but waste time and should + # be caught and eliminated during code development. + if (DEVEL_MODE) { + my ( $a, $b, $c ) = caller(); + Fault( +"Bad call to forced breakpoint from $a $b $c ; called with i=$i\n" + ); + } + return; + } + + # Break after token $i + my ($i_nonblank) = $self->set_forced_breakpoint_AFTER($i); + + # If we break at an opening container..break at the closing + my $set_closing; + if ( defined($i_nonblank) + && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } ) + { + $set_closing = 1; + $self->set_closing_breakpoint($i_nonblank); + } + + DEBUG_FORCE && do { + my ( $a, $b, $c ) = caller(); + my $msg = +"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go"; + if ( !defined($i_nonblank) ) { + $i = "" unless defined($i); + $msg .= " but could not set break after i='$i'\n"; + } + else { + $msg .= <= 0 ); # Back up at a blank so we have a token to examine. # This was added to fix for cases like b932 involving an '=' break. if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- } - # no breaks between welded tokens + # Never break between welded tokens return if ( $total_weld_count && $self->[_rK_weld_right_]->{ $K_to_go[$i] } ); @@ -12408,21 +12514,6 @@ sub compare_indentation_levels { if ( $i >= 0 && $i <= $max_index_to_go ) { my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; - DEBUG_FORCE && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; - }; - - ###################################################################### - # NOTE: if we call set_closing_breakpoint below it will then call - # this routing back. So there is the possibility of an infinite - # loop if a programming error is made. As a precaution, I have - # added a check on the forced_breakpoint flag, so that we won't - # keep trying to set it. That will give additional protection - # against a loop. - ###################################################################### - if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 && !$forced_breakpoint_to_go[$i_nonblank] ) @@ -12436,11 +12527,8 @@ sub compare_indentation_levels { $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = $i_nonblank; - # if we break at an opening container..break at the closing - if ( $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } ) - { - $self->set_closing_breakpoint($i_nonblank); - } + # success + return $i_nonblank; } } return; @@ -12524,17 +12612,14 @@ sub compare_indentation_levels { if ( $mate_index_to_go[$i_break] >= 0 ) { - # CAUTION: infinite recursion possible here: - # set_closing_breakpoint calls set_forced_breakpoint, and - # set_forced_breakpoint call set_closing_breakpoint - # ( test files attrib.t, BasicLyx.pm.html). - # Don't reduce the '2' in the statement below + # Don't reduce the '2' in the statement below. + # Test files: attrib.t, BasicLyx.pm.html if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { # break before } ] and ), but sub set_forced_breakpoint will decide # to break before or after a ? and : my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; - $self->set_forced_breakpoint( + $self->set_forced_breakpoint_AFTER( $mate_index_to_go[$i_break] - $inc ); } } @@ -13130,7 +13215,7 @@ EOM } elsif ( $is_closing_sequence_token{$token} ) { - if ( $rwant_container_open->{$seqno} ) { + if ( $i > 0 && $rwant_container_open->{$seqno} ) { $self->set_forced_breakpoint( $i - 1 ); } @@ -16680,7 +16765,9 @@ sub set_continuation_breaks { } ## end if ( $type eq ':' ) if ( has_postponed_breakpoint($type_sequence) ) { my $inc = ( $type eq ':' ) ? 0 : 1; - $self->set_forced_breakpoint( $i - $inc ); + if ( $i >= $inc ) { + $self->set_forced_breakpoint( $i - $inc ); + } } } ## end if ( $is_closing_sequence_token{$token} ) @@ -17149,8 +17236,9 @@ sub set_continuation_breaks { { $ibr--; } - - $self->set_forced_breakpoint($ibr); + if ( $ibr >= 0 ) { + $self->set_forced_breakpoint($ibr); + } } } ## end if ( defined($i_start_2...)) @@ -17161,9 +17249,11 @@ sub set_continuation_breaks { # note: break before closing structure will be automatic if ( $minimum_depth <= $current_depth ) { - $self->set_forced_breakpoint($i_opening) - unless ( $do_not_break_apart - || is_unbreakable_container($current_depth) ); + if ( $i_opening >= 0 ) { + $self->set_forced_breakpoint($i_opening) + unless ( $do_not_break_apart + || is_unbreakable_container($current_depth) ); + } # break at ',' of lower depth level before opening token if ( $last_comma_index[$depth] ) { @@ -17406,16 +17496,18 @@ sub set_continuation_breaks { # break open container... my $i_opening = $opening_structure_index_stack[$dd]; - $self->set_forced_breakpoint($i_opening) - unless ( - is_unbreakable_container($dd) + if ( defined($i_opening) && $i_opening >= 0 ) { + $self->set_forced_breakpoint($i_opening) + unless ( + is_unbreakable_container($dd) - # Avoid a break which would place an isolated ' or " - # on a line - || ( $type eq 'Q' - && $i_opening >= $max_index_to_go - 2 - && ( $token eq "'" || $token eq '"' ) ) - ); + # Avoid a break which would place an isolated ' or " + # on a line + || ( $type eq 'Q' + && $i_opening >= $max_index_to_go - 2 + && ( $token eq "'" || $token eq '"' ) ) + ); + } } ## end for ( my $dd = $current_depth...) # Return a flag indicating if the input file had some good breakpoints. -- 2.39.5