# Verify that the line hash does not have any unknown keys.
$self->check_line_hashes() if (DEVEL_MODE);
+ # Experimental new ci calculation
+ $self->set_ci();
+
{
# Make a pass through all tokens, adding or deleting any whitespace as
# required. Also make any other changes, such as adding semicolons.
return;
} ## end sub dump_block_summary
+sub set_ci {
+
+ # Set the basic continuation indentation (ci) for all tokens.
+ # This is an experimental routine which will eventually replace
+ # the ci values computed by the tokenizer.
+
+ use constant TEST_NEW_CI => 0;
+ use constant DEBUG_SET_CI => 0;
+
+ return unless (TEST_NEW_CI);
+
+ #---------------------------------------------------------------------------
+ ## FIXME: This is also in break_lists; might become a global constant
+ my %is_logical_container;
+
+ # Removed ? : to fix t007 and others
+ ##my @q = qw# if elsif unless while and or err not && | || ? : ! #;
+ my @q = qw# if elsif unless while and or err not && | || ! #;
+ @is_logical_container{@q} = (1) x scalar(@q);
+
+ # CAUTION: using differnt hash than in tokenizer here, but same name:
+ my %is_container_label_type;
+ ## From tokenizer ???@q = qw( k => && || ? : . );
+ ## Need to include '!'
+ ## What about placing '.' in logical container
+ @q = qw# k && | || ? : ! #;
+ @is_container_label_type{@q} = (1) x scalar(@q);
+
+ #---------------------------------------------------------------------------
+
+ # Trying to match old version for t027
+ # add = for t015
+ # FIXME: See @value_requestor_type for more that might be included
+ my %bin_op_type;
+ @q = qw# . ** -> + - / * = != ^ #;
+ @bin_op_type{@q} = (1) x scalar(@q);
+
+ my ($self) = @_;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ return unless defined($Klimit);
+
+ my $token = ';';
+ my $type = ';';
+ my $level = 0;
+ my $seqno = EMPTY_STRING;
+ my $ci = 0;
+ my $ci_next = 0;
+
+ my $last_token = $token;
+ my $last_type = $type;
+ my $last_level = $level;
+ my $last_seqno = $seqno;
+ my $ci_last = $ci;
+
+ my $rstack;
+
+ # TODO:
+ # - note that ci_default = 0 only for 'List'
+ my $seq_root = SEQ_ROOT;
+ push @{$rstack},
+ {
+ _seqno => $seq_root,
+ _ci_open => 0,
+ _ci_open_next => 0,
+ _ci_close => 0,
+ _ci_close_next => 0,
+ _container_type => 'Block',
+ _ci_default => 1,
+ _in_ci => 0,
+ _keep_ci => 0,
+ _has_comma => 0,
+ _Kc => undef,
+ };
+
+ DEBUG_SET_CI
+ && print STDERR <<EOM;
+lno\tci\tci_this\tci_next\tlast_type\tlast_tok\tlast_seqno\tlast_level\ttype\ttok\tseqno\tlevel\tname\tpname\tin_ci\tblock_type\terror?
+EOM
+
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_sub_block = $self->[_ris_sub_block_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+
+ my $map_block_follows = sub {
+
+ # return true if a sort/map/etc block follows the closing brace
+ # of container $seqno
+ my ($seqno) = @_;
+ my $Kc = $K_closing_container->{$seqno};
+ return unless defined($Kc);
+ my $Kcn = $self->K_next_code($Kc);
+ return unless defined($Kcn);
+ my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
+ return if ( defined($seqno_n) );
+ my $Knn = $self->K_next_code($Kcn);
+ return unless defined($Knn);
+ my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_];
+ return unless defined($seqno_nn);
+ return unless $K_opening_container->{$seqno_nn} == $Knn;
+ my $block_type = $rblock_type_of_seqno->{$seqno_nn};
+
+ if ($block_type) {
+ return $is_block_with_ci{$block_type};
+ }
+ return;
+ };
+
+ my $KK_last = 0;
+ my $K_start_statement =
+ $rLL->[0]->[_TYPE_] eq '#' ? $self->K_next_code(0) : 0;
+ foreach my $KK ( 0 .. $Klimit ) {
+ my $item = $rLL->[$KK];
+
+ $type = $item->[_TYPE_];
+ $token = $item->[_TOKEN_];
+ $level = $item->[_LEVEL_];
+ $seqno = $item->[_TYPE_SEQUENCE_];
+ $ci = $item->[_CI_LEVEL_];
+
+ # PATCH to ignore ci>1 from tokenizer when making comparisons
+ if ( $ci > 1 ) { $ci = 1 }
+
+ my $lno = $item->[_LINE_INDEX_] + 1;
+
+ # Definitions:
+ # $ci_this = the ci for this token
+ # $ci_next = the ci for the next token
+ # $ci_default = the default ci for this container
+ my $container_type = EMPTY_STRING;
+
+ # Normally we use the ci value value set by previous token.
+ my $ci_this = $ci_next;
+
+ # First guess at next value uses the stored default
+ # which is 0 for logical containers, 1 for other containers:
+ $ci_next = $rstack->[-1]->{_ci_default};
+
+ # We will change these two ci values necessary for special cases...
+
+ #-------------------------------
+ # Handle certain specific tokens
+ #-------------------------------
+
+ # Handle a comment
+ if ( $type eq '#' ) { $ci_next = $ci_this }
+
+ # For blanks, the ci should not be important,
+ # but to match existing code a rule for blanks seems to be:
+ # A blank after closing token has same ci as previous token,
+ # Otherwise a blank has same ci as next token;
+ elsif ( $type eq 'b' ) {
+
+ $ci_next = $ci_this;
+ if ( $is_closing_type{$last_type} ) {
+ $ci_this = $ci_last;
+ }
+ }
+
+ # A comma and the subsequent item normally have ci undone
+ # unless ci has been set at a lower level
+ elsif ( $type eq ',' ) {
+
+ if ( $rstack->[-1]->{_container_type} eq 'List' ) {
+ $ci_this = $ci_next = $rstack->[-1]->{_in_ci};
+ }
+ $rstack->[-1]->{_has_comma} = 1;
+ }
+
+ # The next token after a ';' and label (type 'J') starts a new stmt
+ # The ci after a C-style for ';' (type 'f') is handled similarly.
+ # TODO: There is redundant coding in sub respace which can be
+ # removed if this becomes the standard routine for computing ci.
+ elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) {
+ $ci_next = 0;
+ $K_start_statement = $self->K_next_code($KK);
+ }
+
+ # Undo ci after a format statement
+ elsif ( $type eq 'k' && substr( $token, 0, 6 ) eq 'format' ) {
+ $ci_next = 0;
+ }
+
+ #---------------------------
+ # Handle container tokens...
+ #---------------------------
+ elsif ($seqno) {
+
+ #------------------------
+ # Opening container token
+ #------------------------
+ if ( $is_opening_sequence_token{$token} ) {
+
+ # Default ci values for the closing token, to be modified
+ # as necessary:
+ my $ci_close = $ci_next;
+ my $ci_close_next = $rstack->[-1]->{_ci_default};
+
+ my $level_next = $level;
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) ) {
+ $level_next = $rLL->[$Kn]->[_LEVEL_];
+ }
+ my $Kc = $K_closing_container->{$seqno};
+ my $Kcn = $self->K_next_code($Kc);
+
+ my $opening_level_jump = $level_next - $level;
+
+ # Determine the container type...
+ my $is_logical =
+ ## k => && || ? : .
+ $is_container_label_type{$last_type}
+
+ && $is_logical_container{$last_token};
+
+ if ( $token eq '(' ) {
+
+ # 'foreach' and 'for' paren contents are treated as logical
+ if ( $last_type eq 'k' ) {
+ $is_logical ||=
+ $last_token eq 'for' || $last_token eq 'foreach';
+ }
+
+ # Check for 'for' and 'foreach' loops with iterators
+ # FIXME: should make a sub to check this more carefully
+ elsif ($last_type eq 'i'
+ && $K_start_statement
+ && $KK - $K_start_statement <= 6 )
+ {
+ my $type_1 = $rLL->[$K_start_statement]->[_TYPE_];
+ my $token_1 = $rLL->[$K_start_statement]->[_TOKEN_];
+ my $level_1 = $rLL->[$K_start_statement]->[_LEVEL_];
+ $is_logical ||=
+ $type_1 eq 'k'
+ && $level == $level_1
+ && ( $token_1 eq 'for' || $token_1 eq 'foreach' );
+ }
+ elsif ( $last_token eq '(' ) {
+ $is_logical ||=
+ $rstack->[-1]->{_container_type} eq 'Logical';
+ }
+ }
+
+ my $ci_default = 1;
+
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ $block_type = EMPTY_STRING unless ($block_type);
+
+ # Default: ci of first item of list with level jump is same as
+ # ci of first item of container
+ if ( $opening_level_jump > 0 ) {
+ $ci_next = $rstack->[-1]->{_ci_open_next};
+ }
+
+ # Block; or an opening brace in the star
+ if ($block_type) {
+ $container_type = 'Block';
+
+ # default is zero ci for closing paren
+ $ci_close = 0;
+
+ # block types sort/map/etc use zero ci at terminal
+ # brace if previous keyword had zero ci. This will
+ # cause sort/map/grep filter blocks to line up.
+ if ( $is_block_with_ci{$block_type} ) {
+ if ( $map_block_follows->($seqno) ) {
+ if ($ci_last) {
+ $ci_close = $ci_this;
+ }
+ }
+ else {
+ $ci_close = $ci_this;
+ }
+ }
+
+ $ci_this = 0;
+ $ci_next = 0;
+ $ci_close_next = $ci_close;
+ $K_start_statement = $self->K_next_code($KK);
+ }
+
+ # Ternary
+ elsif ( $type eq '?' ) {
+ $container_type = 'Ternary';
+ if ( $rstack->[-1]->{_container_type} eq 'List'
+ && !$rstack->[-1]->{_keep_ci} )
+ {
+ $ci_this = 0;
+ $ci_close = 0;
+ }
+ }
+
+ # Logical
+ elsif ($is_logical) {
+ $container_type = 'Logical';
+ $ci_default = 0;
+ $ci_close_next = $ci_this;
+ }
+
+ # List (or maybe just some grouping of terms)
+ else {
+ $container_type = 'List';
+
+ # lists not in blocks ...
+ if ( $rstack->[-1]->{_container_type} ne 'Block' ) {
+ if ( !$rstack->[-1]->{_has_comma} ) {
+ if ($opening_level_jump) {
+ $ci_close = $ci_this;
+ }
+
+ # undo ci at binary op after right paren if no
+ # commas in container; fixes t027, t028
+ if ( defined($Kcn) ) {
+ if ( $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } ) {
+ $ci_close_next = $ci_close;
+ }
+ }
+ }
+
+ # Undo ci for block comment between a pair of closing
+ # tokens; fixes issue c022/t012;
+ my $Kc_parent = $rstack->[-1]->{_Kc};
+ if ( $ci_close_next
+ && $rstack->[-1]->{_has_comma}
+ && $Kc_parent
+ && $Kc_parent == $Kcn )
+ {
+ $ci_close_next = 0;
+ }
+ }
+
+ # lists in blocks
+ else {
+ if ( $rstack->[-1]->{_container_type} eq 'Block' ) {
+
+ # undo ci if another closing token follows
+ my $Kc = $K_closing_container->{$seqno};
+ if ( defined($Kc) ) {
+ my $Kcn = $self->K_next_code($Kc);
+ if ( defined($Kcn) ) {
+ my $closing_level_jump =
+ $rLL->[$Kcn]->[_LEVEL_] - $level;
+ if ( $closing_level_jump < 0 ) {
+ $ci_close = $ci_this;
+ }
+ }
+ }
+ }
+ }
+
+ if ( $rstack->[-1]->{_container_type} eq 'Ternary' ) {
+ $ci_next = 0;
+ }
+ }
+
+ my $in_ci = $ci_next
+ && ( !$opening_level_jump
+ || !$rstack->[-1]->{_container_type} ne 'Block' ) ? 1 : 0;
+
+ my $keep_ci = $ci_next && !$opening_level_jump;
+
+ # check: closing ci must not be less than opening
+ if ( $ci_close < $ci_this ) { $ci_close = $ci_this }
+
+ push @{$rstack},
+ {
+ _seqno => $seqno,
+ _container_type => $container_type,
+ _ci_default => $ci_default,
+ _in_ci => $in_ci,
+ _ci_open => $ci_this,
+ _ci_open_next => $ci_next,
+ _ci_close => $ci_close,
+ _ci_close_next => $ci_close_next,
+ _keep_ci => $keep_ci,
+ _has_comma => 0,
+ _Kc => $Kc,
+ };
+ }
+
+ #------------------------
+ # Closing container token
+ #------------------------
+ else {
+ if ( @{$rstack} > 1 ) {
+
+ # We just have to pull out the values set by the
+ # corresponding opening token
+ my $item = pop @{$rstack};
+ my $seqno_test = $item->{_seqno};
+
+ $ci_this = $item->{_ci_close};
+ $ci_next = $item->{_ci_close_next};
+
+ if ( $seqno_test ne $seqno ) {
+
+ # Shouldn't happen if we are processing balanced text.
+ DEVEL_MODE
+ && Fault("stack error: $seqno_test != $seqno\n");
+ }
+
+ # The next token after certain closing block braces
+ # starts a new statement
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ($block_type) {
+ if ( $is_block_without_semicolon{$block_type}
+ || $ris_sub_block->{$seqno} )
+ {
+ $K_start_statement = $self->K_next_code($KK);
+ }
+ }
+ }
+ else {
+
+ # Shouldn't happen if we are processing balanced text.
+ DEVEL_MODE && Fault("empty stack - shouldn't happen\n");
+ }
+ }
+ }
+
+ #-----------------------
+ # ci_this should match ci
+ #-----------------------
+ DEBUG_SET_CI && do {
+ my $tok = $token;
+ my $last_tok = $last_token;
+ $tok =~ s/\t//g;
+ $last_tok =~ s/\t//g;
+ $tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok;
+ $last_tok =
+ length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok;
+ $tok =~ s/["']//g;
+ $last_tok =~ s/["']//g;
+ my $block_type;
+ $block_type = $rblock_type_of_seqno->{$seqno} if ($seqno);
+ $block_type = EMPTY_STRING unless ($block_type);
+ if ( !defined($block_type) ) { $block_type = EMPTY_STRING }
+ my $name = $container_type;
+ my $ptype = $rstack->[-1]->{_container_type};
+ my $pname = $ptype;
+ my $error =
+ $ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR";
+ my $in_ci = $rstack->[-1]->{_in_ci};
+ print STDERR <<EOM;
+$lno\t$ci\t$ci_this\t$ci_next\t$last_type\t$last_tok\t$last_seqno\t$last_level\t$type\t$tok\t$seqno\t$level\t$name\t$pname\t$in_ci\t$block_type\t$error
+EOM
+ };
+
+ if (TEST_NEW_CI) {
+ $item->[_CI_LEVEL_] = $ci_this;
+ }
+
+ next if ( $type eq 'b' || $type eq '#' );
+
+ # Remember last nonblank, non-comment token info
+ $KK_last = $KK;
+ $ci_last = $ci_this;
+ $last_token = $token;
+ $last_type = $type;
+ $last_level = $level;
+ $last_seqno = $seqno;
+
+ }
+ return;
+} ## end sub set_ci
+
sub set_CODE_type {
my ($self) = @_;