From: Steve Hancock Date: Fri, 19 May 2023 16:27:00 +0000 (-0700) Subject: optimization of some new code X-Git-Tag: 20230309.03~21 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=4be94c00133b8358a59c1fc68c9c45655bbb299f;p=perltidy.git optimization of some new code --- diff --git a/CHANGES.md b/CHANGES.md index d188b780..d309d161 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,15 @@ ## 2023 03 09.02 + - Some rare, minor issues with continuation indentation have been fixed. + Most scripts will remain unchanged. The main change is that block + comments which occur just before a closing brace, bracket or paren + now have an indentation which is independent of the existance of + an optional comma or semicolon. Previously, adding or deleting + an optional trailing comma could cause their indentation to jump. + Also, indentation of comments within ternary statements has been + improved. + - Issue git #118. A warning will be issued if a duplicate format-skipping starting marker is seen within a format-skipping section. The same applies to duplicate code-skipping starting markers within code-skipping diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 51012ddb..44d6a562 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -6732,6 +6732,8 @@ sub set_ci { my $last_type = $type; my $ci_last = 0; + my $ci_next_next = 1; + my $rstack = (); # - note that ci_default = 0 only for 'List' @@ -6743,7 +6745,7 @@ sub set_ci { _ci_close => 0, _ci_close_next => 0, _container_type => 'Block', - _ci_next_next => 1, + _ci_next_next => $ci_next_next, _comma_count => 0, _redo_list => undef, _Kc => undef, @@ -6830,14 +6832,13 @@ sub set_ci { # First guess at next value uses the stored default # which is 0 for logical containers, 1 for other containers: - $ci_next = $rparent->{_ci_next_next}; + $ci_next = $ci_next_next; # We will change these ci values necessary for special cases... - #------------------------------- - # Handle certain specific tokens - #------------------------------- - + #------- + # Blanks + #------- # For blanks, the ci should not be important, # but to match existing code a rule for blanks seems to be: # A blank after closing token has same ci as previous token, @@ -6849,128 +6850,21 @@ sub set_ci { $ci_this = $ci_last; } $rtoken_K->[_CI_LEVEL_] = $ci_this; - next; - } - # Handle a comment - elsif ( $type eq '#' ) { - - # If at '#' in ternary before a ? or :, use that level to make - # the comment line up with the next ? or : line. (see c202/t052) - # i.e. if a nested ? follows, we increase the '#' level by 1, and - # if a nested : follows, we decrease the '#' level by 1. - # This is the only place where this sub changes a _LEVEL_ value. - my $Kn; - my $parent_container_type = $rparent->{_container_type}; - if ( $parent_container_type eq 'Ternary' ) { - $Kn = $self->K_next_code($KK); - if ($Kn) { - my $type_kn = $rLL->[$Kn]->[_TYPE_]; - if ( $is_ternary{$type_kn} ) { - my $level_KK = $rLL->[$KK]->[_LEVEL_]; - my $level_Kn = $rLL->[$Kn]->[_LEVEL_]; - $rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_]; - } - } - } - - # Undo ci for a block comment followed by a closing token or , or ; - # provided that the parent container: - # - ends without ci, or - # - starts ci=0 and is a comma list or this follows a closing type - # - has a level jump - if ( - $ci_this - && ( - !$rparent->{_ci_close} - || ( - !$rparent->{_ci_open_next} - && ( $rparent->{_comma_count} - || $is_closing_type{$last_type} ) - ) - ) - ) - { - # Be sure this is a block comment - my $lx = $rtoken_K->[_LINE_INDEX_]; - my $rK_range = $rlines->[$lx]->{_rK_range}; - my $Kfirst; - if ($rK_range) { $Kfirst = $rK_range->[0] } - if ( defined($Kfirst) && $Kfirst == $KK ) { - - # Look for trailing closing token - # [ and possibly ',' or ';' ] - $Kn = $self->K_next_code($KK) if ( !$Kn ); - my $Kc = $rparent->{_Kc}; - if ( - $Kn - && $Kc - && ( - $Kn == $Kc - - # only look for comma if -wbb=',' is set - # to minimize changes to existing formatting - || ( $rLL->[$Kn]->[_TYPE_] eq ',' - && $want_break_before_comma - && $parent_container_type eq 'List' ) - - # do not look ahead for a bare ';' because - # it changes old formatting with little benefit. -## || ( $rLL->[$Kn]->[_TYPE_] eq ';' -## && $parent_container_type eq 'Block' ) - ) - ) - { - - # Be sure container has a level jump - my $level_KK = $rLL->[$KK]->[_LEVEL_]; - my $level_Kc = $rLL->[$Kc]->[_LEVEL_]; - if ( $level_Kc < $level_KK ) { - $ci_this = 0; - } - } - } - } - - $ci_next = $ci_this; - $rtoken_K->[_CI_LEVEL_] = $ci_this; + # 'next' to avoid saving last_ values for blanks and commas next; } - # A comma and the subsequent item normally have ci undone - # unless ci has been set at a lower level - elsif ( $type eq ',' ) { - - if ( $rparent->{_container_type} eq 'List' ) { - $ci_this = $ci_next = $rparent->{_ci_open_next}; - } - $rparent->{_comma_count}++; - } - - # The next token after a ';' and label (type 'J') starts a new stmt - # The ci after a C-style for ';' (type 'f') is handled similarly. - # TODO: There is type 'f' redundant coding in sub respace which can - # be removed if this becomes the standard routine for computing ci. - elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) { - $ci_next = 0; - if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last } - } - - # Undo ci after a format statement - elsif ( $type eq 'k' ) { - if ( substr( $token, 0, 6 ) eq 'format' ) { - $ci_next = 0; - } - } + #-------------------- + # Container tokens... + #-------------------- + elsif ( $rtoken_K->[_TYPE_SEQUENCE_] ) { - #--------------------------- - # Handle container tokens... - #--------------------------- - elsif ( my $seqno = $rtoken_K->[_TYPE_SEQUENCE_] ) { + my $seqno = $rtoken_K->[_TYPE_SEQUENCE_]; - #------------------------ - # Opening container token - #------------------------ + #------------------------- + # Opening container tokens + #------------------------- if ( $is_opening_sequence_token{$token} ) { my $level = $rtoken_K->[_LEVEL_]; @@ -6979,14 +6873,33 @@ sub set_ci { # Default ci values for the closing token, to be modified # as necessary: my $ci_close = $ci_next; - my $ci_close_next = $rparent->{_ci_next_next}; + my $ci_close_next = $ci_next_next; my $Kc = $type eq '?' ? $K_closing_ternary->{$seqno} : $K_closing_container->{$seqno}; - my $Kcn = $self->K_next_code($Kc); - my $Kn = $self->K_next_nonblank($KK); + + ##my $Kn = $self->K_next_nonblank($KK); + my $Kn; + if ( $KK < $Klimit ) { + $Kn = $KK + 1; + if ( $rLL->[$Kn]->[_TYPE_] eq 'b' && $Kn < $Klimit ) { + $Kn += 1; + } + } + + ##my $Kcn = $self->K_next_code($Kc); + my $Kcn; + if ( $Kc && $Kc < $Klimit ) { + $Kcn = $Kc + 1; + if ( $rLL->[$Kcn]->[_TYPE_] eq 'b' && $Kcn < $Klimit ) { + $Kcn += 1; + } + if ( $rLL->[$Kcn]->[_TYPE_] eq '#' ) { + $Kcn = $self->K_next_code($Kcn); + } + } my $opening_level_jump = $Kn ? $rLL->[$Kn]->[_LEVEL_] - $level : 0; @@ -7047,7 +6960,8 @@ sub set_ci { elsif ( $last_type eq '!' ) { $ci_this = $ci_last } } - my $ci_next_next = 1; + # initialize ci_next_next to its standard value + $ci_next_next = 1; my $block_type = $rblock_type_of_seqno->{$seqno}; $block_type = EMPTY_STRING unless ($block_type); @@ -7238,9 +7152,9 @@ sub set_ci { }; } - #------------------------ - # Closing container token - #------------------------ + #------------------------- + # Closing container tokens + #------------------------- else { my $seqno_test = $rparent->{_seqno}; if ( $seqno_test ne $seqno ) { @@ -7265,7 +7179,8 @@ sub set_ci { my $ci_open_old = $rparent->{_ci_open}; if ( @{$rstack} ) { - $rparent = pop @{$rstack}; + $rparent = pop @{$rstack}; + $ci_next_next = $rparent->{_ci_next_next}; } else { @@ -7286,6 +7201,130 @@ sub set_ci { } } + #--------- + # Comments + #--------- + elsif ( $type eq '#' ) { + + # If at '#' in ternary before a ? or :, use that level to make + # the comment line up with the next ? or : line. (see c202/t052) + # i.e. if a nested ? follows, we increase the '#' level by 1, and + # if a nested : follows, we decrease the '#' level by 1. + # This is the only place where this sub changes a _LEVEL_ value. + my $Kn; + my $parent_container_type = $rparent->{_container_type}; + if ( $parent_container_type eq 'Ternary' ) { + $Kn = $self->K_next_code($KK); + if ($Kn) { + my $type_kn = $rLL->[$Kn]->[_TYPE_]; + if ( $is_ternary{$type_kn} ) { + my $level_KK = $rLL->[$KK]->[_LEVEL_]; + my $level_Kn = $rLL->[$Kn]->[_LEVEL_]; + $rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_]; + } + } + } + + # Undo ci for a block comment followed by a closing token or , or ; + # provided that the parent container: + # - ends without ci, or + # - starts ci=0 and is a comma list or this follows a closing type + # - has a level jump + if ( + $ci_this + && ( + !$rparent->{_ci_close} + || ( + !$rparent->{_ci_open_next} + && ( $rparent->{_comma_count} + || $is_closing_type{$last_type} ) + ) + ) + ) + { + # Be sure this is a block comment + my $lx = $rtoken_K->[_LINE_INDEX_]; + my $rK_range = $rlines->[$lx]->{_rK_range}; + my $Kfirst; + if ($rK_range) { $Kfirst = $rK_range->[0] } + if ( defined($Kfirst) && $Kfirst == $KK ) { + + # Look for trailing closing token + # [ and possibly ',' or ';' ] + $Kn = $self->K_next_code($KK) if ( !$Kn ); + my $Kc = $rparent->{_Kc}; + if ( + $Kn + && $Kc + && ( + $Kn == $Kc + + # only look for comma if -wbb=',' is set + # to minimize changes to existing formatting + || ( $rLL->[$Kn]->[_TYPE_] eq ',' + && $want_break_before_comma + && $parent_container_type eq 'List' ) + + # do not look ahead for a bare ';' because + # it changes old formatting with little benefit. +## || ( $rLL->[$Kn]->[_TYPE_] eq ';' +## && $parent_container_type eq 'Block' ) + ) + ) + { + + # Be sure container has a level jump + my $level_KK = $rLL->[$KK]->[_LEVEL_]; + my $level_Kc = $rLL->[$Kc]->[_LEVEL_]; + if ( $level_Kc < $level_KK ) { + $ci_this = 0; + } + } + } + } + + $ci_next = $ci_this; + $rtoken_K->[_CI_LEVEL_] = $ci_this; + + # 'next' to avoid saving last_ values for blanks and commas + next; + } + + #---------------------- + # Semicolons and Labels + #---------------------- + # The next token after a ';' and label (type 'J') starts a new stmt + # The ci after a C-style for ';' (type 'f') is handled similarly. + # TODO: There is type 'f' redundant coding in sub respace which can + # be removed if this becomes the standard routine for computing ci. + elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) { + $ci_next = 0; + if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last } + } + + #--------- + # Keywords + #--------- + # Undo ci after a format statement + elsif ( $type eq 'k' ) { + if ( substr( $token, 0, 6 ) eq 'format' ) { + $ci_next = 0; + } + } + + #------- + # Commas + #------- + # A comma and the subsequent item normally have ci undone + # unless ci has been set at a lower level + elsif ( $type eq ',' ) { + + if ( $rparent->{_container_type} eq 'List' ) { + $ci_this = $ci_next = $rparent->{_ci_open_next}; + } + $rparent->{_comma_count}++; + } + #----------------------------------------------------------------- # Development test: ci_this should match the ci from the tokenizer # except where the new value makes an improvement. @@ -7335,6 +7374,10 @@ EOM } + #-------------- + # End main loop + #-------------- + # if the logfile is saved, we need to save the leading ci of # each old line of code. if ( $self->[_save_logfile_] ) { @@ -26986,7 +27029,7 @@ EOM # it is any specially marked side comment ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} ) - # or it is a static side comment + # or it is a static side comment || ( $rOpts->{'static-side-comments'} && $token =~ /$static_side_comment_pattern/ ) diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 4f97714f..ddc9f375 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -6036,7 +6036,7 @@ EOM my @type_sequence = (); # stack of output type sequence numbers my @tokens = (); # output tokens my @levels = (); # structural brace levels of output tokens - my @ci_string = (); # string needed to compute continuation indentation + my @ci_levels = (); $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; @@ -6176,7 +6176,6 @@ EOM #-------------------------------- # Store the values for this token #-------------------------------- - push( @ci_string, 0 ); push( @levels, $level_i ); push( @block_type, $routput_block_type->[$i] ); push( @type_sequence, $routput_type_sequence->[$i] ); @@ -6227,6 +6226,9 @@ EOM } } + # This sub returns zero ci values + if (@levels) { @ci_levels = (0) x $#levels } + #---------------------------------------------------------- # Wrap up this line of tokens for shipping to the Formatter #---------------------------------------------------------- @@ -6235,7 +6237,7 @@ EOM $line_of_tokens->{_rblock_type} = \@block_type; $line_of_tokens->{_rtype_sequence} = \@type_sequence; $line_of_tokens->{_rlevels} = \@levels; - $line_of_tokens->{_rci_levels} = \@ci_string; + $line_of_tokens->{_rci_levels} = \@ci_levels; return; } ## end sub tokenizer_wrapup_line_no_ci