From 570706a2483c753fc5ed5660117fc06694a584eb Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Wed, 26 Apr 2023 05:39:53 -0700 Subject: [PATCH] add sub set_ci This will be the new method for computing ci after testing. --- lib/Perl/Tidy/Formatter.pm | 471 +++++++++++++++++++++++++++++++++++++ 1 file changed, 471 insertions(+) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 9e87060f..dfc9d996 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -6015,6 +6015,9 @@ EOM # Verify that the line hash does not have any unknown keys. $self->check_line_hashes() if (DEVEL_MODE); + # Experimental new ci calculation + $self->set_ci(); + { # Make a pass through all tokens, adding or deleting any whitespace as # required. Also make any other changes, such as adding semicolons. @@ -6655,6 +6658,474 @@ sub dump_block_summary { return; } ## end sub dump_block_summary +sub set_ci { + + # Set the basic continuation indentation (ci) for all tokens. + # This is an experimental routine which will eventually replace + # the ci values computed by the tokenizer. + + use constant TEST_NEW_CI => 0; + use constant DEBUG_SET_CI => 0; + + return unless (TEST_NEW_CI); + + #--------------------------------------------------------------------------- + ## FIXME: This is also in break_lists; might become a global constant + my %is_logical_container; + + # Removed ? : to fix t007 and others + ##my @q = qw# if elsif unless while and or err not && | || ? : ! #; + my @q = qw# if elsif unless while and or err not && | || ! #; + @is_logical_container{@q} = (1) x scalar(@q); + + # CAUTION: using differnt hash than in tokenizer here, but same name: + my %is_container_label_type; + ## From tokenizer ???@q = qw( k => && || ? : . ); + ## Need to include '!' + ## What about placing '.' in logical container + @q = qw# k && | || ? : ! #; + @is_container_label_type{@q} = (1) x scalar(@q); + + #--------------------------------------------------------------------------- + + # Trying to match old version for t027 + # add = for t015 + # FIXME: See @value_requestor_type for more that might be included + my %bin_op_type; + @q = qw# . ** -> + - / * = != ^ #; + @bin_op_type{@q} = (1) x scalar(@q); + + my ($self) = @_; + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + return unless defined($Klimit); + + my $token = ';'; + my $type = ';'; + my $level = 0; + my $seqno = EMPTY_STRING; + my $ci = 0; + my $ci_next = 0; + + my $last_token = $token; + my $last_type = $type; + my $last_level = $level; + my $last_seqno = $seqno; + my $ci_last = $ci; + + my $rstack; + + # TODO: + # - note that ci_default = 0 only for 'List' + my $seq_root = SEQ_ROOT; + push @{$rstack}, + { + _seqno => $seq_root, + _ci_open => 0, + _ci_open_next => 0, + _ci_close => 0, + _ci_close_next => 0, + _container_type => 'Block', + _ci_default => 1, + _in_ci => 0, + _keep_ci => 0, + _has_comma => 0, + _Kc => undef, + }; + + DEBUG_SET_CI + && print STDERR <[_rblock_type_of_seqno_]; + my $ris_sub_block = $self->[_ris_sub_block_]; + my $ris_asub_block = $self->[_ris_asub_block_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + + my $map_block_follows = sub { + + # return true if a sort/map/etc block follows the closing brace + # of container $seqno + my ($seqno) = @_; + my $Kc = $K_closing_container->{$seqno}; + return unless defined($Kc); + my $Kcn = $self->K_next_code($Kc); + return unless defined($Kcn); + my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_]; + return if ( defined($seqno_n) ); + my $Knn = $self->K_next_code($Kcn); + return unless defined($Knn); + my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_]; + return unless defined($seqno_nn); + return unless $K_opening_container->{$seqno_nn} == $Knn; + my $block_type = $rblock_type_of_seqno->{$seqno_nn}; + + if ($block_type) { + return $is_block_with_ci{$block_type}; + } + return; + }; + + my $KK_last = 0; + my $K_start_statement = + $rLL->[0]->[_TYPE_] eq '#' ? $self->K_next_code(0) : 0; + foreach my $KK ( 0 .. $Klimit ) { + my $item = $rLL->[$KK]; + + $type = $item->[_TYPE_]; + $token = $item->[_TOKEN_]; + $level = $item->[_LEVEL_]; + $seqno = $item->[_TYPE_SEQUENCE_]; + $ci = $item->[_CI_LEVEL_]; + + # PATCH to ignore ci>1 from tokenizer when making comparisons + if ( $ci > 1 ) { $ci = 1 } + + my $lno = $item->[_LINE_INDEX_] + 1; + + # Definitions: + # $ci_this = the ci for this token + # $ci_next = the ci for the next token + # $ci_default = the default ci for this container + my $container_type = EMPTY_STRING; + + # Normally we use the ci value value set by previous token. + my $ci_this = $ci_next; + + # First guess at next value uses the stored default + # which is 0 for logical containers, 1 for other containers: + $ci_next = $rstack->[-1]->{_ci_default}; + + # We will change these two ci values necessary for special cases... + + #------------------------------- + # Handle certain specific tokens + #------------------------------- + + # Handle a comment + if ( $type eq '#' ) { $ci_next = $ci_this } + + # For blanks, the ci should not be important, + # but to match existing code a rule for blanks seems to be: + # A blank after closing token has same ci as previous token, + # Otherwise a blank has same ci as next token; + elsif ( $type eq 'b' ) { + + $ci_next = $ci_this; + if ( $is_closing_type{$last_type} ) { + $ci_this = $ci_last; + } + } + + # A comma and the subsequent item normally have ci undone + # unless ci has been set at a lower level + elsif ( $type eq ',' ) { + + if ( $rstack->[-1]->{_container_type} eq 'List' ) { + $ci_this = $ci_next = $rstack->[-1]->{_in_ci}; + } + $rstack->[-1]->{_has_comma} = 1; + } + + # The next token after a ';' and label (type 'J') starts a new stmt + # The ci after a C-style for ';' (type 'f') is handled similarly. + # TODO: There is redundant coding in sub respace which can be + # removed if this becomes the standard routine for computing ci. + elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) { + $ci_next = 0; + $K_start_statement = $self->K_next_code($KK); + } + + # Undo ci after a format statement + elsif ( $type eq 'k' && substr( $token, 0, 6 ) eq 'format' ) { + $ci_next = 0; + } + + #--------------------------- + # Handle container tokens... + #--------------------------- + elsif ($seqno) { + + #------------------------ + # Opening container token + #------------------------ + if ( $is_opening_sequence_token{$token} ) { + + # Default ci values for the closing token, to be modified + # as necessary: + my $ci_close = $ci_next; + my $ci_close_next = $rstack->[-1]->{_ci_default}; + + my $level_next = $level; + my $Kn = $self->K_next_nonblank($KK); + if ( defined($Kn) ) { + $level_next = $rLL->[$Kn]->[_LEVEL_]; + } + my $Kc = $K_closing_container->{$seqno}; + my $Kcn = $self->K_next_code($Kc); + + my $opening_level_jump = $level_next - $level; + + # Determine the container type... + my $is_logical = + ## k => && || ? : . + $is_container_label_type{$last_type} + + && $is_logical_container{$last_token}; + + if ( $token eq '(' ) { + + # 'foreach' and 'for' paren contents are treated as logical + if ( $last_type eq 'k' ) { + $is_logical ||= + $last_token eq 'for' || $last_token eq 'foreach'; + } + + # Check for 'for' and 'foreach' loops with iterators + # FIXME: should make a sub to check this more carefully + elsif ($last_type eq 'i' + && $K_start_statement + && $KK - $K_start_statement <= 6 ) + { + my $type_1 = $rLL->[$K_start_statement]->[_TYPE_]; + my $token_1 = $rLL->[$K_start_statement]->[_TOKEN_]; + my $level_1 = $rLL->[$K_start_statement]->[_LEVEL_]; + $is_logical ||= + $type_1 eq 'k' + && $level == $level_1 + && ( $token_1 eq 'for' || $token_1 eq 'foreach' ); + } + elsif ( $last_token eq '(' ) { + $is_logical ||= + $rstack->[-1]->{_container_type} eq 'Logical'; + } + } + + my $ci_default = 1; + + my $block_type = $rblock_type_of_seqno->{$seqno}; + $block_type = EMPTY_STRING unless ($block_type); + + # Default: ci of first item of list with level jump is same as + # ci of first item of container + if ( $opening_level_jump > 0 ) { + $ci_next = $rstack->[-1]->{_ci_open_next}; + } + + # Block; or an opening brace in the star + if ($block_type) { + $container_type = 'Block'; + + # default is zero ci for closing paren + $ci_close = 0; + + # block types sort/map/etc use zero ci at terminal + # brace if previous keyword had zero ci. This will + # cause sort/map/grep filter blocks to line up. + if ( $is_block_with_ci{$block_type} ) { + if ( $map_block_follows->($seqno) ) { + if ($ci_last) { + $ci_close = $ci_this; + } + } + else { + $ci_close = $ci_this; + } + } + + $ci_this = 0; + $ci_next = 0; + $ci_close_next = $ci_close; + $K_start_statement = $self->K_next_code($KK); + } + + # Ternary + elsif ( $type eq '?' ) { + $container_type = 'Ternary'; + if ( $rstack->[-1]->{_container_type} eq 'List' + && !$rstack->[-1]->{_keep_ci} ) + { + $ci_this = 0; + $ci_close = 0; + } + } + + # Logical + elsif ($is_logical) { + $container_type = 'Logical'; + $ci_default = 0; + $ci_close_next = $ci_this; + } + + # List (or maybe just some grouping of terms) + else { + $container_type = 'List'; + + # lists not in blocks ... + if ( $rstack->[-1]->{_container_type} ne 'Block' ) { + if ( !$rstack->[-1]->{_has_comma} ) { + if ($opening_level_jump) { + $ci_close = $ci_this; + } + + # undo ci at binary op after right paren if no + # commas in container; fixes t027, t028 + if ( defined($Kcn) ) { + if ( $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } ) { + $ci_close_next = $ci_close; + } + } + } + + # Undo ci for block comment between a pair of closing + # tokens; fixes issue c022/t012; + my $Kc_parent = $rstack->[-1]->{_Kc}; + if ( $ci_close_next + && $rstack->[-1]->{_has_comma} + && $Kc_parent + && $Kc_parent == $Kcn ) + { + $ci_close_next = 0; + } + } + + # lists in blocks + else { + if ( $rstack->[-1]->{_container_type} eq 'Block' ) { + + # undo ci if another closing token follows + my $Kc = $K_closing_container->{$seqno}; + if ( defined($Kc) ) { + my $Kcn = $self->K_next_code($Kc); + if ( defined($Kcn) ) { + my $closing_level_jump = + $rLL->[$Kcn]->[_LEVEL_] - $level; + if ( $closing_level_jump < 0 ) { + $ci_close = $ci_this; + } + } + } + } + } + + if ( $rstack->[-1]->{_container_type} eq 'Ternary' ) { + $ci_next = 0; + } + } + + my $in_ci = $ci_next + && ( !$opening_level_jump + || !$rstack->[-1]->{_container_type} ne 'Block' ) ? 1 : 0; + + my $keep_ci = $ci_next && !$opening_level_jump; + + # check: closing ci must not be less than opening + if ( $ci_close < $ci_this ) { $ci_close = $ci_this } + + push @{$rstack}, + { + _seqno => $seqno, + _container_type => $container_type, + _ci_default => $ci_default, + _in_ci => $in_ci, + _ci_open => $ci_this, + _ci_open_next => $ci_next, + _ci_close => $ci_close, + _ci_close_next => $ci_close_next, + _keep_ci => $keep_ci, + _has_comma => 0, + _Kc => $Kc, + }; + } + + #------------------------ + # Closing container token + #------------------------ + else { + if ( @{$rstack} > 1 ) { + + # We just have to pull out the values set by the + # corresponding opening token + my $item = pop @{$rstack}; + my $seqno_test = $item->{_seqno}; + + $ci_this = $item->{_ci_close}; + $ci_next = $item->{_ci_close_next}; + + if ( $seqno_test ne $seqno ) { + + # Shouldn't happen if we are processing balanced text. + DEVEL_MODE + && Fault("stack error: $seqno_test != $seqno\n"); + } + + # The next token after certain closing block braces + # starts a new statement + my $block_type = $rblock_type_of_seqno->{$seqno}; + if ($block_type) { + if ( $is_block_without_semicolon{$block_type} + || $ris_sub_block->{$seqno} ) + { + $K_start_statement = $self->K_next_code($KK); + } + } + } + else { + + # Shouldn't happen if we are processing balanced text. + DEVEL_MODE && Fault("empty stack - shouldn't happen\n"); + } + } + } + + #----------------------- + # ci_this should match ci + #----------------------- + DEBUG_SET_CI && do { + my $tok = $token; + my $last_tok = $last_token; + $tok =~ s/\t//g; + $last_tok =~ s/\t//g; + $tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok; + $last_tok = + length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok; + $tok =~ s/["']//g; + $last_tok =~ s/["']//g; + my $block_type; + $block_type = $rblock_type_of_seqno->{$seqno} if ($seqno); + $block_type = EMPTY_STRING unless ($block_type); + if ( !defined($block_type) ) { $block_type = EMPTY_STRING } + my $name = $container_type; + my $ptype = $rstack->[-1]->{_container_type}; + my $pname = $ptype; + my $error = + $ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR"; + my $in_ci = $rstack->[-1]->{_in_ci}; + print STDERR <[_CI_LEVEL_] = $ci_this; + } + + next if ( $type eq 'b' || $type eq '#' ); + + # Remember last nonblank, non-comment token info + $KK_last = $KK; + $ci_last = $ci_this; + $last_token = $token; + $last_type = $type; + $last_level = $level; + $last_seqno = $seqno; + + } + return; +} ## end sub set_ci + sub set_CODE_type { my ($self) = @_; -- 2.39.5