From 7d23bf444503eaaf8a758744dccc86548c661a19 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Tue, 13 Apr 2021 20:51:42 -0700 Subject: [PATCH] Correct brace types mismarked by tokenizer --- lib/Perl/Tidy/Formatter.pm | 79 ++++++++++++++++++++++++++++---------- lib/Perl/Tidy/Tokenizer.pm | 12 +++++- local-docs/BugLog.pod | 13 ++++++- 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index a273e9f8..6cc468c6 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -5957,34 +5957,59 @@ sub respace_tokens { my $line_diff = $lx_close - $lx_open; $ris_broken_container->{$seqno} = $line_diff; - # Handle code blocks + # See if this is a list + my $is_list; + my $rtype_count = $rtype_count_by_seqno->{$seqno}; + if ($rtype_count) { + my $comma_count = $rtype_count->{','}; + my $fat_comma_count = $rtype_count->{'=>'}; + my $semicolon_count = $rtype_count->{';'}; + + # We will define a list to be a container with one or more commas + # and no semicolons. + $is_list = + ( $comma_count || $fat_comma_count ) && !$semicolon_count; + + } + + # Look for a block brace marked as uncertain. If the tokenizer thinks + # its guess is uncertain for the type of a brace following an unknown + # bareword then it adds a trailing space as a signal. We can fix the + # type here now that we have had a better look at the contents of the + # container. This fixes case b1085. To find the corresponding code in + # Tokenizer.pm search for 'b1085' with an editor. my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_]; - if ($block_type) { + if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) { - # The -lp option needs to know if a container holds a code block - next unless ($rOpts_line_up_parentheses); + # Always remove the trailing space + $block_type =~ s/\s+$//; - my $seqno_parent = $rparent_of_seqno->{$seqno}; - while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { - $rhas_code_block->{$seqno_parent} = 1; - $rhas_broken_code_block->{$seqno_parent} = $line_diff; - $seqno_parent = $rparent_of_seqno->{$seqno_parent}; + # Try to filter out parenless sub calls + my ( $Knn1, $Knn2 ); + my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' ); + $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new ); + $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1); + $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) ); + $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) ); + if ( $type_nn1 eq 'w' && $type_nn2 =~ /^[wiqQGCZ]$/ ) { + $is_list = 0; } - next; + # Convert to a hash brace if it looks like it holds a list + if ($is_list) { + + $block_type = ""; + + $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1; + $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1; + } + + $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type; + $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type; } - # Handle lists - my $rtype_count = $rtype_count_by_seqno->{$seqno}; - next unless ($rtype_count); - my $comma_count = $rtype_count->{','}; - my $fat_comma_count = $rtype_count->{'=>'}; - my $semicolon_count = $rtype_count->{';'}; - - # We will define a list to be a container with one or more commas and - # no semicolons. - my $is_list = ( $comma_count || $fat_comma_count ) && !$semicolon_count; - if ($is_list) { + # Handle a list container + if ( $is_list && !$block_type ) { $ris_list_by_seqno->{$seqno} = $seqno; my $seqno_parent = $rparent_of_seqno->{$seqno}; my $depth = 0; @@ -6021,6 +6046,17 @@ sub respace_tokens { $seqno_parent = $rparent_of_seqno->{$seqno_parent}; } } + + # Handle code blocks ... + # The -lp option needs to know if a container holds a code block + elsif ( $block_type && $rOpts_line_up_parentheses ) { + my $seqno_parent = $rparent_of_seqno->{$seqno}; + while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { + $rhas_code_block->{$seqno_parent} = 1; + $rhas_broken_code_block->{$seqno_parent} = $line_diff; + $seqno_parent = $rparent_of_seqno->{$seqno_parent}; + } + } } # Find containers with ternaries, needed for -lp formatting. @@ -22063,3 +22099,4 @@ sub wrapup { } ## end package Perl::Tidy::Formatter 1; + diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index d7856d9f..b1fe0741 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -5352,6 +5352,16 @@ sub decide_if_code_block { $code_block_type = ""; } } + + if ( $code_block_type && $pre_types[$j] eq '}' ) { + + # Patch for case b1085: if we hit the sentinal token then it is + # uncertain if this is a block. If this brace follows a bareword, + # then append a space as a signal to the formatter that this may + # not be a block brace. To find the corresponding code in + # Formatter.pm search for 'b1085'. + $code_block_type .= " " if ( $code_block_type =~ /^\w/ ); + } } return $code_block_type; @@ -7698,7 +7708,7 @@ sub scan_number_do { |([0-7_]+ # string of octal digits (\.([0-7][0-7_]*)?)? # optional decimal and fraction [Pp][+-]?[0-7] # REQUIRED exponent, no underscore - [0-7_]*) # Additional exponent digits, with underscores + [0-7_]*) # Additional exponent digits with underscores # or octal integer |([0-7_]+) # string of octal digits diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod index c9b3405e..846cac82 100644 --- a/local-docs/BugLog.pod +++ b/local-docs/BugLog.pod @@ -3,6 +3,17 @@ =over 4 +=item B + +Testing with random parameters produced a case in which a brace following an +unknown bareword was marked by the tokenizer as a code block brace rather than +a hash brace. This can cause poor formatting. The problem was solved by +having the tokenizer send a signal to the formatter if a block type was +guessed. The formatter has more information and can fix the problem. This +fixes case b1085. + +11 Apr 2021. + =item B Random testing produced some cases where welded quotes were not converging. @@ -12,7 +23,7 @@ to a new common sub. This update fixes cases b1066 b1067 b1071 b1079 b1080. -10 Apr 2021. +10 Apr 2021, 5d73dd5. =item B -- 2.39.5