use strict;
use warnings;
use Carp;
-our $VERSION = '20190601';
+our $VERSION = '20200110';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
@levels_to_go
@leading_spaces_to_go
@reduced_spaces_to_go
- @matching_token_to_go
@mate_index_to_go
@ci_levels_to_go
@nesting_depth_to_go
%is_anon_sub_1_brace_follower
%is_sort_map_grep
%is_sort_map_grep_eval
+ %want_one_line_block
%is_sort_map_grep_eval_do
%is_block_without_semicolon
%is_if_unless
$gnu_position_predictor = 0; # where the current token is predicted to be
$max_gnu_stack_index = 0;
$max_gnu_item_index = -1;
- $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- $last_output_indentation = 0;
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
+ $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
+ @gnu_item_list = ();
+ $last_output_indentation = 0;
+ $last_indentation_written = 0;
+ $last_unadjusted_indentation = 0;
+ $last_leading_token = "";
$last_output_short_opening_token = 0;
$saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
@summed_lengths_to_go = (); # line length to start of ith token
@token_lengths_to_go = ();
@levels_to_go = ();
- @matching_token_to_go = ();
@mate_index_to_go = ();
@ci_levels_to_go = ();
@nesting_depth_to_go = (0);
K_closing_container => {}, # for quickly traversing structure
K_opening_ternary => {}, # for quickly traversing structure
K_closing_ternary => {}, # for quickly traversing structure
+ rcontainer_map => {}, # hierarchical map of containers
rK_phantom_semicolons =>
undef, # for undoing phantom semicolons if iterating
rpaired_to_inner_container => {},
rbreak_container => {}, # prevent one-line blocks
+ rshort_nested => {}, # blocks not forced open
rvalid_self_keys => [], # for checking
valign_batch_count => 0,
};
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $input_stream_name = $logger_object->get_input_stream_name();
Die(<<EOM);
==============================================================================
-Fault detected at line $line0 of sub '$subroutine1'
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
# Scan all lines looking for runs of consecutive lines beginning with
# selected keywords. Example keywords are 'my', 'our', 'local', ... but
# they may be anything. We will set flags requesting that blanks be
- # inserted around and withing them according to input parameters. Note
+ # inserted around and within them according to input parameters. Note
# that we are scanning the lines as they came in in the input stream, so
# they are not necessarily well formatted.
next;
}
+ # Handle block comment to be deleted
+ elsif ( $CODE_type eq 'DEL' ) {
+ $self->flush();
+ next;
+ }
+
# Handle all other lines of code
$self->print_line_of_tokens($line_of_tokens);
}
# out of __END__ and __DATA__ sections, because
# the user may be using this section for any purpose whatsoever
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
+ if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
if ( !$skip_line
&& !$in_format_skipping_section
if ( $jmax >= 0 ) {
$Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
foreach my $j ( 0 .. $jmax ) {
+
+ # Clip negative nesting depths to zero to avoid problems.
+ # Negative values can occur in files with unbalanced containers
+ my $slevel = $rslevels->[$j];
+ if ( $slevel < 0 ) { $slevel = 0 }
+
my @tokary;
@tokary[
_TOKEN_, _TYPE_,
$rblock_type->[$j], $rcontainer_type->[$j],
$rcontainer_environment->[$j], $rtype_sequence->[$j],
$rlevels->[$j], $rlevels->[$j],
- $rslevels->[$j], $rci_levels->[$j],
+ $slevel, $rci_levels->[$j],
$input_line_no,
);
push @{$rLL}, \@tokary;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'L'} = WS_NO;
- $binary_ws_rules{'}'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'$'}{'{'} = WS_NO;
$binary_ws_rules{'@'}{'L'} = WS_NO;
# A sub to store one token in the new array
# All new tokens must be stored by this sub so that it can update
# all data structures on the fly.
- my $last_nonblank_type = ';';
- my $store_token = sub {
+ my $last_nonblank_type = ';';
+ my $last_nonblank_token = ';';
+ my $last_nonblank_block_type = '';
+ my $store_token = sub {
my ($item) = @_;
# This will be the index of this item in the new array
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
my $type = $item->[_TYPE_];
- if ( $type ne 'b' ) { $last_nonblank_type = $type }
+
+ # trim side comments
+ if ( $type eq '#' ) {
+ $item->[_TOKEN_] =~ s/\s*$//;
+ }
+
+ if ( $type && $type ne 'b' && $type ne '#' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $item->[_TOKEN_];
+ $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
+ }
# and finally, add this item to the new array
push @{$rLL_new}, $item;
# or we are deleting all whitespace
# Note that whitespace flag is a flag indicating whether a
# white space BEFORE the token is needed
- next if ( $KK >= $Kmax ); # skip terminal blank
+ next if ( $KK >= $Klast ); # skip terminal blank
my $Knext = $KK + 1;
my $ws = $rwhitespace_flags->[$Knext];
if ( $ws == -1
}
if ( $token =~ /$SUB_PATTERN/ ) {
+
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ my $spp = $rOpts->{'space-prototype-paren'};
+ if ( defined($spp) ) {
+ if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
+ elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ }
+
+ # one space max, and no tabs
$token =~ s/\s+/ /g;
$rtoken_vars->[_TOKEN_] = $token;
}
# check a quote for problems
elsif ( $type eq 'Q' ) {
+ $check_Q->( $KK, $Kfirst );
+ }
+
+ # handle semicolons
+ elsif ( $type eq ';' ) {
+
+ # Remove unnecessary semicolons, but not after bare
+ # blocks, where it could be unsafe if the brace is
+ # mistokenized.
+ if (
+ $rOpts->{'delete-semicolons'}
+ && (
+ (
+ $last_nonblank_type eq '}'
+ && (
+ $is_block_without_semicolon{
+ $last_nonblank_block_type}
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
+ || $last_nonblank_block_type =~ /^\w+:$/ )
+ )
+ || $last_nonblank_type eq ';'
+ )
+ )
+ {
+
+ # This looks like a deletable semicolon, but even if a
+ # semicolon can be deleted it is necessarily best to do so.
+ # We apply these additional rules for deletion:
+ # - Always ok to delete a ';' at the end of a line
+ # - Never delete a ';' before a '#' because it would
+ # promote it to a block comment.
+ # - If a semicolon is not at the end of line, then only
+ # delete if it is followed by another semicolon or closing
+ # token. This includes the comment rule. It may take
+ # two passes to get to a final state, but it is a little
+ # safer. For example, keep the first semicolon here:
+ # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
+ # It is not required but adds some clarity.
+ my $ok_to_delete = 1;
+ if ( $KK < $Klast ) {
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) && $Kn <= $Klast ) {
+ my $next_nonblank_token_type =
+ $rLL->[$Kn]->[_TYPE_];
+ $ok_to_delete = $next_nonblank_token_type eq ';'
+ || $next_nonblank_token_type eq '}';
+ }
+ }
- # This is ready to go but is commented out because there is
- # still identical logic in sub break_lines.
- # $check_Q->($KK, $Kfirst);
+ if ($ok_to_delete) {
+ note_deleted_semicolon();
+ next;
+ }
+ else {
+ write_logfile_entry("Extra ';'\n");
+ }
+ }
}
elsif ($type_sequence) {
}
}
-=pod
- # NOTE: This does not work yet. Version in print-line-of-tokens
- # is Still used until fixed
-
- # compare input/output indentation except for continuation lines
- # (because they have an unknown amount of initial blank space)
- # and lines which are quotes (because they may have been outdented)
- # Note: this test is placed here because we know the continuation flag
- # at this point, which allows us to avoid non-meaningful checks.
- my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
- compare_indentation_levels( $guessed_indentation_level,
- $structural_indentation_level )
- unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0
- && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
-=cut
-
# Patch needed for MakeMaker. Do not break a statement
# in which $VERSION may be calculated. See MakeMaker.pm;
# this is based on the coding in it.
return;
}
+sub map_containers {
+
+ # Maps the container hierarchy
+ my $self = shift;
+ my $rLL = $self->{rLL};
+ return unless ( defined($rLL) && @{$rLL} );
+
+ my $K_opening_container = $self->{K_opening_container};
+ my $K_closing_container = $self->{K_closing_container};
+ my $rcontainer_map = $self->{rcontainer_map};
+
+ # loop over containers
+ my @stack; # stack of container sequence numbers
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+ Fault("sequence = $type_sequence not defined at K=$KK");
+ }
+
+ my $token = $rtoken_vars->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
+ if (@stack) {
+ $rcontainer_map->{$type_sequence} = $stack[-1];
+ }
+ push @stack, $type_sequence;
+ }
+ if ( $is_closing_token{$token} ) {
+ if (@stack) {
+ my $seqno = pop @stack;
+ if ( $seqno != $type_sequence ) {
+
+ # shouldn't happen unless file is garbage
+ }
+ }
+ }
+ }
+
+ # the stack should be empty for a good file
+ if (@stack) {
+
+ # unbalanced containers; file probably bad
+ }
+ else {
+ # ok
+ }
+ return;
+}
+
+sub mark_short_nested_blocks {
+
+ # This routine looks at the entire file and marks any short nested blocks
+ # which should not be broken. The results are stored in the hash
+ # $rshort_nested->{$type_sequence}
+ # which will be true if the container should remain intact.
+ #
+ # For example, consider the following line:
+
+ # sub cxt_two { sort { $a <=> $b } test_if_list() }
+
+ # The 'sort' block is short and nested within an outer sub block.
+ # Normally, the existance of the 'sort' block will force the sub block to
+ # break open, but this is not always desirable. Here we will set a flag for
+ # the sort block to prevent this. To give the user control, we will
+ # follow the input file formatting. If either of the blocks is broken in
+ # the input file then we will allow it to remain broken. Otherwise we will
+ # set a flag to keep it together in later formatting steps.
+
+ # The flag which is set here will be checked in two places:
+ # 'sub print_line_of_tokens' and 'sub starting_one_line_block'
+
+ my $self = shift;
+ my $rLL = $self->{rLL};
+ return unless ( defined($rLL) && @{$rLL} );
+
+ return unless ( $rOpts->{'one-line-block-nesting'} );
+
+ my $K_opening_container = $self->{K_opening_container};
+ my $K_closing_container = $self->{K_closing_container};
+ my $rbreak_container = $self->{rbreak_container};
+ my $rshort_nested = $self->{rshort_nested};
+ my $rcontainer_map = $self->{rcontainer_map};
+ my $rlines = $self->{rlines};
+
+ # Variables needed for estimating line lengths
+ my $starting_indent;
+ my $starting_lentot;
+ my $length_tol = 1;
+
+ my $excess_length_to_K = sub {
+ my ($K) = @_;
+
+ # Estimate the length from the line start to a given token
+ my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
+ my $excess_length =
+ $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
+ return ($excess_length);
+ };
+
+ my $is_broken_block = sub {
+
+ # a block is broken if the input line numbers of the braces differ
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
+
+ # loop over all containers
+ my @open_block_stack;
+ my $iline = -1;
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+
+ # an error here is most likely due to a recent programming change
+ Fault("sequence = $type_sequence not defined at K=$KK");
+ }
+
+ # We are just looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ next unless ($block_type);
+
+ # Keep a stack of all acceptable block braces seen.
+ # Only consider blocks entirely on one line so dump the stack when line
+ # changes.
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline != $iline_last ) { @open_block_stack = () }
+
+ if ( $token eq '}' ) {
+ if (@open_block_stack) { pop @open_block_stack }
+ }
+ next unless ( $token eq '{' );
+
+ # block must be balanced (bad scripts may be unbalanced)
+ my $K_opening = $K_opening_container->{$type_sequence};
+ my $K_closing = $K_closing_container->{$type_sequence};
+ next unless ( defined($K_opening) && defined($K_closing) );
+
+ # require that this block be entirely on one line
+ next if ( $is_broken_block->($type_sequence) );
+
+ # See if this block fits on one line of allowed length (which may
+ # be different from the input script)
+ $starting_lentot =
+ $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
+
+ # Dump the stack if block is too long and skip this block
+ if ( $excess_length_to_K->($K_closing) > 0 ) {
+ @open_block_stack = ();
+ next;
+ }
+
+ # OK, Block passes tests, remember it
+ push @open_block_stack, $type_sequence;
+
+ # We are only marking nested code blocks,
+ # so check for a previous block on the stack
+ next unless ( @open_block_stack > 1 );
+
+ # Looks OK, mark this as a short nested block
+ $rshort_nested->{$type_sequence} = 1;
+
+ }
+ return;
+}
+
sub weld_containers {
# do any welding operations
# loop over structure items to find cuddled pairs
my $level = 0;
- my $KK = 0;
- while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
- Fault("sequence = $type_sequence not defined");
+ next if ( $KK == 0 ); # first token in file may not be container
+ Fault("sequence = $type_sequence not defined at K=$KK");
}
# We use the original levels because they get changed by sub
# Do not weld if this makes our line too long
$do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
+ # DO-NOT-WELD RULE 4; implemented for git#10:
+ # Do not weld an opening -ce brace if the next container is on a single
+ # line, different from the opening brace. (This is very rare). For
+ # example, given the following with -ce, we will avoid joining the {
+ # and [
+
+ # } else {
+ # [ $_, length($_) ]
+ # }
+
+ # because this would produce a terminal one-line block:
+
+ # } else { [ $_, length($_) ] }
+
+ # which may not be what is desired. But given this input:
+
+ # } else { [ $_, length($_) ] }
+
+ # then we will do the weld and retain the one-line block
+ if ( $rOpts->{'cuddled-else'} ) {
+ my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
+ if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
+ my $io_line = $inner_opening->[_LINE_INDEX_];
+ my $ic_line = $inner_closing->[_LINE_INDEX_];
+ my $oo_line = $outer_opening->[_LINE_INDEX_];
+ $do_not_weld ||=
+ ( $oo_line < $io_line && $ic_line == $io_line );
+ }
+ }
+
if ($do_not_weld) {
# After neglecting a pair, we start measuring from start of point io
};
# look for single qw quotes nested in containers
- my $KK = 0;
- while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
+ my $KNEXT = 0;
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
my $rtoken_vars = $rLL->[$KK];
my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$outer_seqno ) {
- Fault("sequence = $outer_seqno not defined");
+ next if ( $KK == 0 ); # first token in file may not be container
+ Fault("sequence = $outer_seqno not defined at K=$KK");
}
my $token = $rtoken_vars->[_TOKEN_];
$line_of_tokens->{_line_text} =~ s/\s+$//;
}
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+
+ # Deleting semicolons can create new empty code lines
+ # which should be marked as blank
+ if ( !defined($Kfirst) ) {
+ my $code_type = $line_of_tokens->{_code_type};
+ if ( !$code_type ) {
+ $line_of_tokens->{_code_type} = 'BL';
+ }
+ }
}
}
# remains fixed for the rest of this iteration.
$self->respace_tokens();
+ # Make a hierarchical map of the containers
+ $self->map_containers();
+
# Implement any welding needed for the -wn or -cb options
$self->weld_containers();
+ # Locate small nested blocks which should not be broken
+ $self->mark_short_nested_blocks();
+
# Finishes formatting and write the result to the line sink.
# Eventually this call should just change the 'rlines' data according to the
# new line breaks and then return so that we can do an internal iteration
# return length of tokens ($ibeg .. $iend) including $ibeg & $iend
# returns 0 if $ibeg > $iend (shouldn't happen)
my ( $ibeg, $iend ) = @_;
- return 0 if ( $iend < 0 || $ibeg > $iend );
+ return 0 if ( $iend < 0 || $ibeg > $iend );
return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
}
write_logfile_entry(
" Last at input line $last_deleted_semicolon_at\n");
}
- write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
+ write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
write_logfile_entry("\n");
}
}
}
+ make_sub_matching_pattern();
make_bli_pattern();
make_block_brace_vertical_tightness_pattern();
make_blank_line_pattern();
make_keyword_group_list_pattern();
+ # Make initial list of desired one line block types
+ # They will be modified by 'prepare_cuddled_block_types'
+ %want_one_line_block = %is_sort_map_grep_eval;
+
prepare_cuddled_block_types();
if ( $rOpts->{'dump-cuddled-block-list'} ) {
dump_cuddled_block_list(*STDOUT);
'?' => ':',
);
+ if ( $rOpts->{'ignore-old-breakpoints'} ) {
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
+ );
+ }
+ if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
+ );
+ }
+
+ # Note: there are additional parameters that can be made inactive by
+ # -iob, but they are on by default so we would generate excessive
+ # warnings if we noted them. They are:
+ # $rOpts->{'break-at-old-keyword-breakpoints'}
+ # $rOpts->{'break-at-old-logical-breakpoints'}
+ # $rOpts->{'break-at-old-ternary-breakpoints'}
+ # $rOpts->{'break-at-old-attribute-breakpoints'}
+ }
+
# frequently used parameters
$rOpts_add_newlines = $rOpts->{'add-newlines'};
$rOpts_add_whitespace = $rOpts->{'add-whitespace'};
$word_count++;
$rcuddled_block_types->{$start}->{$word} =
1; #"$string_count.$word_count";
+
+ # git#9: Remove this word from the list of desired one-line
+ # blocks
+ $want_one_line_block{$word} = 0;
}
}
return;
return;
}
+sub make_sub_matching_pattern {
+
+ $SUB_PATTERN = '^sub\s+(::|\w)';
+ $ASUB_PATTERN = '^sub$';
+
+ if ( $rOpts->{'sub-alias-list'} ) {
+
+ # Note that any 'sub-alias-list' has been preprocessed to
+ # be a trimmed, space-separated list which includes 'sub'
+ # for example, it might be 'sub method fun'
+ my $sub_alias_list = $rOpts->{'sub-alias-list'};
+ $sub_alias_list =~ s/\s+/\|/g;
+ $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ }
+ return;
+}
+
sub make_bli_pattern {
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
$container_environment_to_go[$max_index_to_go] = $container_environment;
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$mate_index_to_go[$max_index_to_go] = -1;
- $matching_token_to_go[$max_index_to_go] = '';
$bond_strength_to_go[$max_index_to_go] = 0;
# Note: negative levels are currently retained as a diagnostic so that
return;
}
- sub insert_new_token_to_go {
-
- # insert a new token into the output stream. use same level as
- # previous token; assumes a character at max_index_to_go.
- my ( $self, @args ) = @_;
- save_current_token();
- ( $token, $type, $slevel, $no_internal_newlines ) = @args;
-
- if ( $max_index_to_go == UNDEFINED_INDEX ) {
- warning("code bug: bad call to insert_new_token_to_go\n");
- }
- $level = $levels_to_go[$max_index_to_go];
-
- # FIXME: it seems to be necessary to use the next, rather than
- # previous, value of this variable when creating a new blank (align.t)
- #my $slevel = $nesting_depth_to_go[$max_index_to_go];
- $ci_level = $ci_levels_to_go[$max_index_to_go];
- $container_environment = $container_environment_to_go[$max_index_to_go];
- $in_continued_quote = 0;
- $block_type = "";
- $type_sequence = "";
-
- # store an undef for the K value to catch unexpected usage
- # This routine is only called by add_closing_side_comments, and
- # eventually that call will be eliminated.
- $Ktoken_vars = undef;
-
- $self->store_token_to_go();
- restore_current_token();
- return;
- }
-
sub copy_hash {
my ($rold_token_hash) = @_;
my %new_token_hash =
my $rLL = $self->{rLL};
my $rbreak_container = $self->{rbreak_container};
+ my $rshort_nested = $self->{rshort_nested};
if ( !defined($K_first) ) {
- # Unexpected blank line..
- # Calling routine was supposed to handle this
- Warn(
-"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
- );
+ # Empty line: This can happen if tokens are deleted, for example
+ # with the -mangle parameter
return;
}
######################################
if ($is_comment) {
- if ( $rOpts->{'delete-block-comments'} ) { return }
-
if ( $rOpts->{'tee-block-comments'} ) {
$file_writer_object->tee_on();
}
return;
}
- # TODO: Move to sub scan_comments
# compare input/output indentation except for continuation lines
# (because they have an unknown amount of initial blank space)
# and lines which are quotes (because they may have been outdented)
- # Note: this test is placed here because we know the continuation flag
- # at this point, which allows us to avoid non-meaningful checks.
my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
compare_indentation_levels( $guessed_indentation_level,
$structural_indentation_level )
if ( $type eq '#' ) {
- # trim trailing whitespace
- # (there is no option at present to prevent this)
- $token =~ s/\s*$//;
-
if (
$rOpts->{'delete-side-comments'}
$next_nonblank_token_type =
$rinput_token_array->[$j_next]->[_TYPE_];
- ######################
- # MAYBE MOVE ELSEWHERE?
- ######################
- if ( $type eq 'Q' ) {
- note_embedded_tab() if ( $token =~ "\t" );
-
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
- if (
- $token =~ /^(s|tr|y|m|\/)/
- && $last_nonblank_token =~ /^(=|==|!=)$/
-
- # preceded by simple scalar
- && $last_last_nonblank_type eq 'i'
- && $last_last_nonblank_token =~ /^\$/
-
- # followed by some kind of termination
- # (but give complaint if we can's see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
-
- # scalar is not declared
- && !(
- $types_to_go[0] eq 'k'
- && $tokens_to_go[0] =~ /^(my|our|local)$/
- )
- )
- {
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
- complain(
-"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
- );
- }
- }
-
# Do not allow breaks which would promote a side comment to a
# block comment. In order to allow a break before an opening
# or closing BLOCK, followed by a side comment, those sections
( $type eq '{'
&& $token eq '{'
&& $block_type
+ && !$rshort_nested->{$type_sequence}
&& $block_type ne 't' );
my $is_closing_BLOCK =
( $type eq '}'
&& $token eq '}'
&& $block_type
+ && !$rshort_nested->{$type_sequence}
&& $block_type ne 't' );
if ( $side_comment_follows
destroy_one_line_block();
}
- # Remove unnecessary semicolons, but not after bare
- # blocks, where it could be unsafe if the brace is
- # mistokenized.
- if (
- (
- $last_nonblank_token eq '}'
- && (
- $is_block_without_semicolon{
- $last_nonblank_block_type}
- || $last_nonblank_block_type =~ /$SUB_PATTERN/
- || $last_nonblank_block_type =~ /^\w+:$/ )
- )
- || $last_nonblank_type eq ';'
- )
- {
-
- if (
- $rOpts->{'delete-semicolons'}
-
- # don't delete ; before a # because it would promote it
- # to a block comment
- && ( $next_nonblank_token_type ne '#' )
- )
- {
- note_deleted_semicolon();
- $self->output_line_to_go()
- unless ( $no_internal_newlines
- || $index_start_one_line_block != UNDEFINED_INDEX );
- next;
- }
- else {
- write_logfile_entry("Extra ';'\n");
- }
- }
$self->store_token_to_go();
$self->output_line_to_go()
};
# Do not end line in a weld
- # TODO: Move this fix into the routine?
- #my $jnb = $max_index_to_go;
- #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
return if ( weld_len_right_to_go($max_index_to_go) );
# just set a tentative breakpoint if we might be in a one-line block
return;
}
-## my $cscw_block_comment;
-## $cscw_block_comment = $self->add_closing_side_comment()
-## if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
-
my $comma_arrow_count_contained = match_opening_and_closing_tokens();
# tell the -lp option we are outputting a batch so it can close
my $leading_type = $types_to_go[$imin];
# blank lines before subs except declarations and one-liners
- # MCONVERSION LOCATION - for sub tokenization change
if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
$want_blank = $rOpts->{'blank-lines-before-subs'}
- if (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) !~ /^[\;\}]$/
- );
+ if ( $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
}
# break before all package declarations
- # MCONVERSION LOCATION - for tokenizaton change
elsif ($leading_token =~ /^(package\s)/
&& $leading_type eq 'i' )
{
# break before certain key blocks except one-liners
if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
$want_blank = $rOpts->{'blank-lines-before-subs'}
- if (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
+ if ( $self->terminal_type_i( $imin, $imax ) ne '}' );
}
# Break before certain block types if we haven't had a
&& $lc >= $rOpts->{'long-block-line-count'}
&& consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
- && (
- terminal_type( \@types_to_go, \@block_type_to_go, $imin,
- $imax ) ne '}'
- );
+ && $self->terminal_type_i( $imin, $imax ) ne '}';
}
# Check for blank lines wanted before a closing brace
else {
( $ri_first, $ri_last, my $colon_count ) =
- set_continuation_breaks($saw_good_break);
+ $self->set_continuation_breaks($saw_good_break);
- break_all_chain_tokens( $ri_first, $ri_last );
+ $self->break_all_chain_tokens( $ri_first, $ri_last );
break_equals( $ri_first, $ri_last );
recombine_breakpoints( $ri_first, $ri_last );
}
- insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
+ $self->insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
}
# do corrector step if -lp option is used
if ( $rOpts_one_line_block_semicolons == 0 ) {
$self->delete_one_line_semicolons( $ri_first, $ri_last );
}
- $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
- $do_not_pad );
+
+ # The line breaks for this batch of code have been finalized. Now we
+ # can to package the results for further processing. We will switch
+ # from the local '_to_go' buffer arrays (i-index) back to the global
+ # token arrays (K-index) at this point.
+ my $rlines_K;
+ my $index_error;
+ for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
+ my $ibeg = $ri_first->[$n];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $iend = $ri_last->[$n];
+ my $Kend = $K_to_go[$iend];
+ if ( $iend - $ibeg != $Kend - $Kbeg ) {
+ $index_error = $n unless defined($index_error);
+ }
+ push @{$rlines_K},
+ [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
+ }
+
+ # Check correctness of the mapping between the i and K token indexes
+ if ( defined($index_error) ) {
+
+ # Temporary debug code - should never get here
+ for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
+ my $ibeg = $ri_first->[$n];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $iend = $ri_last->[$n];
+ my $Kend = $K_to_go[$iend];
+ my $idiff = $iend - $ibeg;
+ my $Kdiff = $Kend - $Kbeg;
+ print STDERR <<EOM;
+line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
+EOM
+ }
+ Fault("Index error at line $index_error; i and K ranges differ");
+ }
+
+ my $rbatch_hash = {
+ rlines_K => $rlines_K,
+ do_not_pad => $do_not_pad,
+ ibeg0 => $ri_first->[0],
+ };
+
+ $self->send_lines_to_vertical_aligner($rbatch_hash);
# Insert any requested blank lines after an opening brace. We have to
# skip back before any side comment to find the terminal token
prepare_for_new_input_lines();
-## # output any new -cscw block comment
-## if ($cscw_block_comment) {
-## $self->flush();
-## $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
-## }
return;
}
$first_deleted_semicolon_at = $last_deleted_semicolon_at;
}
$deleted_semicolon_count++;
- write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
+ write_logfile_entry("Deleted unnecessary ';' at line $input_line_number\n");
return;
}
my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
my $rbreak_container = $self->{rbreak_container};
+ my $rshort_nested = $self->{rshort_nested};
my $jmax_check = @{$rtoken_array};
if ( $jmax_check < $jmax ) {
if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
else { $pos += rtoken_length($i) }
+ # ignore some small blocks
+ my $type_sequence = $rtoken_array->[$i]->[_TYPE_SEQUENCE_];
+ my $nobreak = $rshort_nested->{$type_sequence};
+
# Return false result if we exceed the maximum line length,
if ( $pos > maximum_line_length($i_start) ) {
return 0;
}
- # or encounter another opening brace before finding the closing brace.
+ # keep going for non-containers
+ elsif ( !$type_sequence ) {
+
+ }
+
+ # return if we encounter another opening brace before finding the
+ # closing brace.
elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
&& $rtoken_array->[$i]->[_TYPE_] eq '{'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
+ && $rtoken_array->[$i]->[_BLOCK_TYPE_]
+ && !$nobreak )
{
return 0;
}
# if we find our closing brace..
elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
&& $rtoken_array->[$i]->[_TYPE_] eq '}'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
+ && $rtoken_array->[$i]->[_BLOCK_TYPE_]
+ && !$nobreak )
{
# be sure any trailing comment also fits on the line
# we keep old one-line blocks but do not form new ones. It is not
# always a good idea to make as many one-line blocks as possible,
# so other types are not done. The user can always use -mangle.
- if ( $is_sort_map_grep_eval{$block_type} ) {
+ if ( $want_one_line_block{$block_type} ) {
create_one_line_block( $i_start, 1 );
}
return 0;
# map { $_, $lookup->{$_} }
# sort { $a <=> $b }
# grep { $lookup->{$_} ne $default } keys %$lookup );
- my ( $ri_first, $ri_last ) = @_;
+ my ( $self, $ri_first, $ri_last ) = @_;
my ( $line_1, $line_2, $lev_last );
my $this_line_is_semicolon_terminated;
my $max_line = @{$ri_first} - 1;
sub pad_token {
# insert $pad_spaces before token number $ipad
- my ( $ipad, $pad_spaces ) = @_;
+ my ( $self, $ipad, $pad_spaces ) = @_;
+ my $rLL = $self->{rLL};
if ( $pad_spaces > 0 ) {
$tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
}
return;
}
+ # Keep token arrays in sync
+ $self->sync_token_K($ipad);
+
$token_lengths_to_go[$ipad] += $pad_spaces;
foreach my $i ( $ipad .. $max_index_to_go ) {
$summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
# &Error_OutOfRange;
# }
#
- my ( $ri_first, $ri_last ) = @_;
+ my ( $self, $ri_first, $ri_last ) = @_;
my $max_line = @{$ri_first} - 1;
# FIXME: move these declarations below
# find any unclosed container
next
unless ( $type_sequence_to_go[$i]
- && $mate_index_to_go[$i] > $iend );
+ && $self->mate_index_to_go($i) > $iend );
# find next nonblank token to pad
$ipad = $inext_to_go[$i];
my $i2 = $ri_last->[$l];
if ( $types_to_go[$i2] eq '#' ) {
my $i1 = $ri_first->[$l];
- next
- if (
- terminal_type( \@types_to_go, \@block_type_to_go,
- $i1, $i2 ) eq ','
- );
+ next if $self->terminal_type_i( $i1, $i2 ) eq ',';
}
}
if ( $pad_spaces == -1 ) {
if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
{
- pad_token( $ipad - 1, $pad_spaces );
+ $self->pad_token( $ipad - 1, $pad_spaces );
}
}
$pad_spaces = 0;
my $length_t = total_line_length( $ibeg, $iend );
if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
{
- pad_token( $ipad, $pad_spaces );
+ $self->pad_token( $ipad, $pad_spaces );
}
}
}
my $self = shift;
# add closing side comments after closing block braces if -csc used
- my $cscw_block_comment;
+ my ( $closing_side_comment, $cscw_block_comment );
#---------------------------------------------------------------
# Step 1: loop through all tokens of this line to accumulate
# ..and the corresponding opening brace must is not in this batch
# (because we do not need to tag one-line blocks, although this
# should also be caught with a positive -csci value)
- && $mate_index_to_go[$i_terminal] < 0
+ && $self->mate_index_to_go($i_terminal) < 0
# ..and either
&& (
}
# switch to the new csc (unless we deleted it!)
- $tokens_to_go[$max_index_to_go] = $token if $token;
+ if ($token) {
+ $tokens_to_go[$max_index_to_go] = $token;
+ $self->sync_token_K($max_index_to_go);
+ }
}
# handle case of NO existing closing side comment
else {
- # Remove any existing blank and add another below.
- # This is a tricky point. A side comment needs to have the same level
- # as the preceding closing brace or else the line will not get the right
- # indentation. So even if we have a blank, we are going to replace it.
- if ( $types_to_go[$max_index_to_go] eq 'b' ) {
- unstore_token_to_go();
- }
-
- # insert the new side comment into the output token stream
- my $type = '#';
- my $block_type = '';
- my $type_sequence = '';
- my $container_environment =
- $container_environment_to_go[$max_index_to_go];
- my $level = $levels_to_go[$max_index_to_go];
- my $slevel = $nesting_depth_to_go[$max_index_to_go];
- my $no_internal_newlines = 0;
-
- my $ci_level = $ci_levels_to_go[$max_index_to_go];
- my $in_continued_quote = 0;
-
- # insert a blank token
- $self->insert_new_token_to_go( ' ', 'b', $slevel,
- $no_internal_newlines );
-
- # then the side comment
- $self->insert_new_token_to_go( $token, $type, $slevel,
- $no_internal_newlines );
+ # To avoid inserting a new token in the token arrays, we
+ # will just return the new side comment so that it can be
+ # inserted just before it is needed in the call to the
+ # vertical aligner.
+ $closing_side_comment = $token;
}
}
- return $cscw_block_comment;
+ return ( $closing_side_comment, $cscw_block_comment );
}
sub previous_nonblank_token {
sub send_lines_to_vertical_aligner {
- my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
+ my ( $self, $rbatch_hash ) = @_;
- my $valign_batch_number = $self->increment_valign_batch_count();
+ # This routine receives a batch of code for which the final line breaks
+ # have been defined. Here we prepare the lines for passing to the vertical
+ # aligner. We do the following tasks:
+ # - mark certain vertical alignment tokens tokens, such as '=', in each line.
+ # - make minor indentation adjustments
+ # - insert extra blank spaces to help display certain logical constructions
- my $cscw_block_comment;
- if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ) {
- $cscw_block_comment = $self->add_closing_side_comment();
+ my $rlines_K = $rbatch_hash->{rlines_K};
+ if ( !@{$rlines_K} ) {
+ Fault("Unexpected call with no lines");
+ return;
+ }
+ my $n_last_line = @{$rlines_K} - 1;
+ my $do_not_pad = $rbatch_hash->{do_not_pad};
- # Add or update any closing side comment
- if ( $types_to_go[$max_index_to_go] eq '#' ) {
- $ri_last->[-1] = $max_index_to_go;
- }
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+
+ my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
+ my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
+ my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
+ my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+
+ # Construct indexes to the global_to_go arrays so that called routines can
+ # still access those arrays. This might eventually be removed
+ # when all called routines have been converted to access token values
+ # in the rLL array instead.
+ my $ibeg0 = $rbatch_hash->{ibeg0};
+ my $Kbeg0 = $Kbeg_next;
+ my ( $ri_first, $ri_last );
+ foreach my $rline ( @{$rlines_K} ) {
+ my ( $Kbeg, $Kend ) = @{$rline};
+ my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
+ my $iend = $ibeg0 + $Kend - $Kbeg0;
+ push @{$ri_first}, $ibeg;
+ push @{$ri_last}, $iend;
+ }
+ #####################################################################
+
+ my $valign_batch_number = $self->increment_valign_batch_count();
+
+ my ( $cscw_block_comment, $closing_side_comment );
+ if ( $rOpts->{'closing-side-comments'} ) {
+ ( $closing_side_comment, $cscw_block_comment ) =
+ $self->add_closing_side_comment();
}
my $rindentation_list = [0]; # ref to indentations for each line
- # define the array @matching_token_to_go for the output tokens
+ # define the array @{$ralignment_type_to_go} for the output tokens
# which will be non-blank for each special token (such as =>)
# for which alignment is required.
- set_vertical_alignment_markers( $ri_first, $ri_last );
-
- # flush if necessary to avoid unwanted alignment
- my $must_flush = 0;
- if ( @{$ri_first} > 1 ) {
+ my $ralignment_type_to_go =
+ $self->set_vertical_alignment_markers( $ri_first, $ri_last );
- # flush before a long if statement
- if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
- $must_flush = 1;
- }
- }
- if ($must_flush) {
+ # flush before a long if statement to avoid unwanted alignment
+ if ( $n_last_line > 0
+ && $type_beg_next eq 'k'
+ && $token_beg_next =~ /^(if|unless)$/ )
+ {
Perl::Tidy::VerticalAligner::flush();
}
- undo_ci( $ri_first, $ri_last );
+ $self->undo_ci( $ri_first, $ri_last );
- set_logical_padding( $ri_first, $ri_last );
+ $self->set_logical_padding( $ri_first, $ri_last );
# loop to prepare each line for shipment
- my $n_last_line = @{$ri_first} - 1;
my $in_comma_list;
+ my ( $Kbeg, $type_beg, $token_beg );
+ my ( $Kend, $type_end );
for my $n ( 0 .. $n_last_line ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
-
- my ( $rtokens, $rfields, $rpatterns ) =
- make_alignment_patterns( $ibeg, $iend );
- # Set flag to show how much level changes between this line
- # and the next line, if we have it.
- my $ljump = 0;
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ my $rline = $rlines_K->[$n];
+ my $forced_breakpoint = $rline->[2];
+
+ # we may need to look at variables on three consecutive lines ...
+
+ # Some vars on line [n-1], if any:
+ my $Kbeg_last = $Kbeg;
+ my $type_beg_last = $type_beg;
+ my $token_beg_last = $token_beg;
+ my $Kend_last = $Kend;
+ my $type_end_last = $type_end;
+
+ # Some vars on line [n]:
+ $Kbeg = $Kbeg_next;
+ $type_beg = $type_beg_next;
+ $token_beg = $token_beg_next;
+ $Kend = $Kend_next;
+ $type_end = $type_end_next;
+
+ # We use two slightly different definitions of level jump at the end
+ # of line:
+ # $ljump is the level jump needed by 'sub set_adjusted_indentation'
+ # $level_jump is the level jump needed by the vertical aligner.
+ my $ljump = 0; # level jump at end of line
+
+ # Get some vars on line [n+1], if any:
if ( $n < $n_last_line ) {
- my $ibegp = $ri_first->[ $n + 1 ];
- $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
+ ( $Kbeg_next, $Kend_next ) =
+ @{ $rlines_K->[ $n + 1 ] };
+ $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
+ $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
+ $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
+ # level jump at end of line for the vertical aligner:
+ my $level_jump =
+ $Kend >= $Klimit
+ ? 0
+ : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
+
+ $self->delete_needless_alignments( $ibeg, $iend,
+ $ralignment_type_to_go );
+
+ my ( $rtokens, $rfields, $rpatterns ) =
+ $self->make_alignment_patterns( $ibeg, $iend,
+ $ralignment_type_to_go );
+
my ( $indentation, $lev, $level_end, $terminal_type,
$is_semicolon_terminated, $is_outdented_line )
= $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
my $outdent_long_lines = (
# which are long quotes, if allowed
- ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+ ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
# which are long block comments, if allowed
|| (
- $types_to_go[$ibeg] eq '#'
+ $type_beg eq '#'
&& $rOpts->{'outdent-long-comments'}
# but not if this is a static block comment
)
);
- my $level_jump =
- $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
-
my $rvertical_tightness_flags =
- set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
$ri_first, $ri_last );
# flush an outdented line to avoid any unwanted vertical alignment
# );
#
my $is_terminal_ternary = 0;
- if ( $tokens_to_go[$ibeg] eq ':'
- || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
- {
- my $last_leading_type = ":";
- if ( $n > 0 ) {
- my $iprev = $ri_first->[ $n - 1 ];
- $last_leading_type = $types_to_go[$iprev];
- }
+
+ if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
+ my $last_leading_type = $n > 0 ? $type_beg_last : ':';
if ( $terminal_type ne ';'
&& $n_last_line > $n
&& $level_end == $lev )
{
- my $inext = $ri_first->[ $n + 1 ];
- $level_end = $levels_to_go[$inext];
- $terminal_type = $types_to_go[$inext];
+ $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
+ $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
}
+ if (
+ $last_leading_type eq ':'
+ && ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $terminal_type ne ':' && $level_end < $lev ) )
+ )
+ {
- $is_terminal_ternary = $last_leading_type eq ':'
- && ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $terminal_type ne ':' && $level_end < $lev ) )
+ # the terminal term must not contain any ternary terms, as in
+ # my $ECHO = (
+ # $Is_MSWin32 ? ".\\echo$$"
+ # : $Is_MacOS ? ":echo$$"
+ # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
+ # );
+ $is_terminal_ternary = 1;
- # the terminal term must not contain any ternary terms, as in
- # my $ECHO = (
- # $Is_MSWin32 ? ".\\echo$$"
- # : $Is_MacOS ? ":echo$$"
- # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
- # );
- && !grep { /^[\?\:]$/ } @types_to_go[ $ibeg + 1 .. $iend ];
+ my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
+ while ( defined($KP) && $KP <= $Kend ) {
+ my $type_KP = $rLL->[$KP]->[_TYPE_];
+ if ( $type_KP eq '?' || $type_KP eq ':' ) {
+ $is_terminal_ternary = 0;
+ last;
+ }
+ $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
+ }
+ }
}
- # send this new line down the pipe
- my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
+ # add any new closing side comment to the last line
+ if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
+ $rfields->[-1] .= " $closing_side_comment";
+ }
+ # send this new line down the pipe
my $rvalign_hash = {};
- $rvalign_hash->{level} = $lev;
- $rvalign_hash->{level_end} = $level_end;
- $rvalign_hash->{indentation} = $indentation;
- $rvalign_hash->{is_forced_break} =
- $forced_breakpoint_to_go[$iend] || $in_comma_list;
+ $rvalign_hash->{level} = $lev;
+ $rvalign_hash->{level_end} = $level_end;
+ $rvalign_hash->{indentation} = $indentation;
+ $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list;
$rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
$rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
$rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
$rtokens, $rpatterns );
- $in_comma_list =
- $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
+ $in_comma_list = $type_end eq ',' && $forced_breakpoint;
# flush an outdented line to avoid any unwanted vertical alignment
Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
$last_output_short_opening_token
# line ends in opening token
- = $types_to_go[$iend] =~ /^[\{\(\[L]$/
+ = $type_end =~ /^[\{\(\[L]$/
# and either
&& (
# line has either single opening token
- $iend == $ibeg
+ $Kend == $Kbeg
# or is a single token followed by opening token.
# Note that sub identifiers have blanks like 'sub doit'
- || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
+ || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ )
)
# and limit total to 10 character widths
# remember indentation of lines containing opening containers for
# later use by sub set_adjusted_indentation
- save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+ $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
# output any new -cscw block comment
if ($cscw_block_comment) {
my %block_type_map;
my %keyword_map;
+ my %operator_map;
BEGIN {
# treat an 'undef' similar to numbers and quotes
'undef' => 'Q',
);
+
+ # map certain operators to the same class for pattern matching
+ %operator_map = (
+ '!~' => '=~',
+ '+=' => '+=',
+ '-=' => '+=',
+ '*=' => '+=',
+ '/=' => '+=',
+ );
+ }
+
+ sub delete_needless_alignments {
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+
+ # Remove unwanted alignments. This routine is a place to remove
+ # alignments which might cause problems at later stages. There are
+ # currently two types of fixes:
+
+ # 1. Remove excess parens
+ # 2. Remove alignments within 'elsif' conditions
+
+ # Patch #1: Excess alignment of parens can prevent other good
+ # alignments. For example, note the parens in the first two rows of
+ # the following snippet. They would normally get marked for alignment
+ # and aligned as follows:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # This causes unnecessary paren alignment and prevents the third equals
+ # from aligning. If we remove the unwanted alignments we get:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # A rule for doing this which works well is to remove alignment of
+ # parens whose containers do not contain other aligning tokens, with
+ # the exception that we always keep alignment of the first opening
+ # paren on a line (for things like 'if' and 'elsif' statements).
+
+ # Setup needed constants
+ my $i_good_paren = -1;
+ my $imin_match = $iend + 1;
+ my $i_elsif_close = $ibeg - 1;
+ my $i_elsif_open = $iend + 1;
+ if ( $iend > $ibeg ) {
+ if ( $types_to_go[$ibeg] eq 'k' ) {
+
+ # Paren patch: mark a location of a paren we should keep, such
+ # as one following something like a leading 'if', 'elsif',..
+ $i_good_paren = $ibeg + 1;
+ if ( $types_to_go[$i_good_paren] eq 'b' ) {
+ $i_good_paren++;
+ }
+
+ # 'elsif' patch: remember the range of the parens of an elsif,
+ # and do not make alignments within them because this can cause
+ # loss of padding and overall brace alignment in the vertical
+ # aligner.
+ if ( $tokens_to_go[$ibeg] eq 'elsif'
+ && $i_good_paren < $iend
+ && $tokens_to_go[$i_good_paren] eq '(' )
+ {
+ $i_elsif_open = $i_good_paren;
+ $i_elsif_close = $self->mate_index_to_go($i_good_paren);
+ }
+ }
+ }
+
+ # Loop to make the fixes on this line
+ my @imatch_list;
+ for my $i ( $ibeg .. $iend ) {
+
+ if ( $ralignment_type_to_go->[$i] ne '' ) {
+
+ # Patch #2: undo alignment within elsif parens
+ if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
+ $ralignment_type_to_go->[$i] = '';
+ next;
+ }
+ push @imatch_list, $i;
+
+ }
+ if ( $tokens_to_go[$i] eq ')' ) {
+
+ # Patch #1: undo the corresponding opening paren if:
+ # - it is at the top of the stack
+ # - and not the first overall opening paren
+ # - does not follow a leading keyword on this line
+ my $imate = $self->mate_index_to_go($i);
+ if ( @imatch_list
+ && $imatch_list[-1] eq $imate
+ && ( $ibeg > 1 || @imatch_list > 1 )
+ && $imate > $i_good_paren )
+ {
+ $ralignment_type_to_go->[$imate] = '';
+ pop @imatch_list;
+ }
+ }
+ }
+ return;
}
sub make_alignment_patterns {
# @patterns - a modified list of token types, one for each alignment
# field. These should normally each match before alignment is
# allowed, even when the alignment tokens match.
- my ( $ibeg, $iend ) = @_;
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
my @tokens = ();
my @fields = ();
my @patterns = ();
my $j = 0; # field index
$patterns[0] = "";
+ my %token_count;
for my $i ( $ibeg .. $iend ) {
# Keep track of containers balanced on this line only.
if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
# if container is balanced on this line...
- my $i_mate = $mate_index_to_go[$i];
+ my $i_mate = $self->mate_index_to_go($i);
if ( $i_mate > $i && $i_mate <= $iend ) {
$depth++;
my $seqno = $type_sequence_to_go[$i];
# matches.
# if we are not aligning on this paren...
- if ( $matching_token_to_go[$i] eq '' ) {
-
- # Sum length from previous alignment, or start of line.
- my $len =
- ( $i_start == $ibeg )
- ? total_line_length( $i_start, $i - 1 )
- : token_sequence_length( $i_start, $i - 1 );
+ if ( $ralignment_type_to_go->[$i] eq '' ) {
+
+ # Sum length from previous alignment
+ my $len = token_sequence_length( $i_start, $i - 1 );
+ if ( $i_start == $ibeg ) {
+
+ # For first token, use distance from start of line
+ # but subtract off the indentation due to level.
+ # Otherwise, results could vary with indentation.
+ $len += leading_spaces_to_go($ibeg) -
+ $levels_to_go[$i_start] * $rOpts_indent_columns;
+ if ( $len < 0 ) { $len = 0 }
+ }
- # tack length onto the container name to make unique
+ # tack this length onto the container name to try
+ # to make a unique token name
$container_name[$depth] .= "-" . $len;
}
}
# if we find a new synchronization token, we are done with
# a field
- if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
+ if ( $i > $i_start && $ralignment_type_to_go->[$i] ne '' ) {
- my $tok = my $raw_tok = $matching_token_to_go[$i];
+ my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
# map similar items
- if ( $tok eq '!~' ) { $tok = '=~' }
+ my $tok_map = $operator_map{$tok};
+ $tok = $tok_map if ($tok_map);
# make separators in different nesting depths unique
# by appending the nesting depth digit.
$tok .= $block_type;
}
+ # Mark multiple copies of certain tokens with the copy number
+ # This will allow the aligner to decide if they are matched.
+ # For now, only do this for equals. For example, the two
+ # equals on the next line will be labeled '=0' and '=0.2'.
+ # Later, the '=0.2' will be ignored in alignment because it
+ # has no match.
+
+ # $| = $debug = 1 if $opt_d;
+ # $full_index = 1 if $opt_i;
+
+ if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
+ $token_count{$tok}++;
+ if ( $token_count{$tok} > 1 ) {
+ $tok .= '.' . $token_count{$tok};
+ }
+ }
+
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
# saves indentations of lines of all unmatched opening tokens.
# These will be used by sub get_opening_indentation.
- my ( $ri_first, $ri_last, $rindentation_list ) = @_;
+ my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
# we no longer need indentations of any saved indentations which
# are unmatched closing tokens in this batch, because we will
# which matches the token at index $i_opening
# -and its offset (number of columns) from the start of the line
#
- my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
+ my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
+ if ( !@{$ri_last} ) {
+ warning("Error in opening_indentation: no lines");
+ return;
+ }
+
my $nline = $rindentation_list->[0]; # line number of previous lookup
# reset line location if necessary
# we need to know the last token of this line
my ( $terminal_type, $i_terminal ) =
- terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
+ $self->terminal_type_i( $ibeg, $iend );
my $is_outdented_line = 0;
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
- = get_opening_indentation( $ibeg_weld_fix, $ri_first, $ri_last,
- $rindentation_list );
+ = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
+ $ri_last, $rindentation_list );
# First set the default behavior:
if (
# undo continuation indentation of a terminal closing token if
# it is the last token before a level decrease. This will allow
# a closing token to line up with its opening counterpart, and
- # avoids a indentation jump larger than 1 level.
+ # avoids an indentation jump larger than 1 level.
if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
&& $i_terminal == $ibeg
&& defined($K_beg) )
{
my $K_next_nonblank = $self->K_next_code($K_beg);
- if ( defined($K_next_nonblank) ) {
+
+ # Patch for RT#131115: honor -bli flag at closing brace
+ my $is_bli =
+ $rOpts_brace_left_and_indent
+ && $block_type_to_go[$i_terminal]
+ && $block_type_to_go[$i_terminal] =~ /$bli_pattern/o;
+
+ if ( !$is_bli && defined($K_next_nonblank) ) {
my $lev = $rLL->[$K_beg]->[_LEVEL_];
my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
$adjust_indentation = 1 if ( $level_next < $lev );
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
+ = $self->get_opening_indentation( $ibeg, $ri_first,
+ $ri_last, $rindentation_list );
my $indentation = $leading_spaces_to_go[$ibeg];
if ( defined($opening_indentation)
&& get_spaces($indentation) >
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
my $indentation = $leading_spaces_to_go[$ibeg];
if ( defined($opening_indentation)
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
- = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
if ($is_leading) { $adjust_indentation = 2; }
}
# updated per bug report in alex_bug.pl: we must not
# mess with the indentation of closing logical braces so
# we must treat something like '} else {' as if it were
- # an isolated brace my $is_isolated_block_brace = (
- # $iend == $ibeg ) && $block_type_to_go[$ibeg];
+ # an isolated brace
#############################################################
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
- && ( $iend == $ibeg
+ && ( $i_terminal == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
$block_type_to_go[$ibeg]
} );
}
}
+sub mate_index_to_go {
+ my ( $self, $i ) = @_;
+
+ # Return the matching index of a container or ternary pair
+ # This is equivalent to the array @mate_index_to_go
+ my $K = $K_to_go[$i];
+ my $K_mate = $self->K_mate_index($K);
+ my $i_mate = -1;
+ if ( defined($K_mate) ) {
+ $i_mate = $i + ( $K_mate - $K );
+ if ( $i_mate < 0 || $i_mate > $max_index_to_go ) {
+ $i_mate = -1;
+ }
+ }
+ my $i_mate_alt = $mate_index_to_go[$i];
+
+ # Debug code to eventually be removed
+ if ( 0 && $i_mate_alt != $i_mate ) {
+ my $tok = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
+ my $tok_mate = '*';
+ my $type_mate = '*';
+ if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) {
+ $tok_mate = $tokens_to_go[$i_mate];
+ $type_mate = $types_to_go[$i_mate];
+ }
+ my $seq = $type_sequence_to_go[$i];
+ my $file = $logger_object->get_input_stream_name();
+
+ Warn(
+"mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate"
+ );
+ }
+ return $i_mate;
+}
+
+sub K_mate_index {
+
+ # Given the index K of an opening or closing container, or ?/: ternary pair,
+ # return the index K of the other member of the pair.
+ my ( $self, $K ) = @_;
+ return unless defined($K);
+ my $rLL = $self->{rLL};
+ my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_];
+ return unless ($seqno);
+
+ my $K_opening = $self->{K_opening_container}->{$seqno};
+ if ( defined($K_opening) ) {
+ if ( $K != $K_opening ) { return $K_opening }
+ return $self->{K_closing_container}->{$seqno};
+ }
+
+ $K_opening = $self->{K_opening_ternary}->{$seqno};
+ if ( defined($K_opening) ) {
+ if ( $K != $K_opening ) { return $K_opening }
+ return $self->{K_closing_ternary}->{$seqno};
+ }
+ return;
+}
+
sub set_vertical_tightness_flags {
- my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
+ my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
# Define vertical tightness controls for the nth line of a batch.
# We create an array of parameters which tell the vertical aligner
my $is_semicolon_terminated;
if ( $n + 1 == $n_last_line ) {
- my ( $terminal_type, $i_terminal ) = terminal_type(
- \@types_to_go, \@block_type_to_go,
- $ibeg_next, $iend_next
- );
+ my ( $terminal_type, $i_terminal ) =
+ $self->terminal_type_i( $ibeg_next, $iend_next );
$is_semicolon_terminated = $terminal_type eq ';'
&& $nesting_depth_to_go[$iend_next] <
$nesting_depth_to_go[$ibeg_next];
{
my %is_vertical_alignment_type;
+ my %is_not_vertical_alignment_token;
my %is_vertical_alignment_keyword;
my %is_terminal_alignment_type;
+ my %is_low_level_alignment_token;
BEGIN {
#;
@is_vertical_alignment_type{@q} = (1) x scalar(@q);
- # only align these at end of line
+ # These 'tokens' are not aligned. We need this to remove [
+ # from the above list because it has type ='{'
+ @q = qw([);
+ @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
+
+ # these are the only types aligned at a line end
@q = qw(&& ||);
@is_terminal_alignment_type{@q} = (1) x scalar(@q);
+ # these tokens only align at line level
+ @q = ( '{', '(' );
+ @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+
# eq and ne were removed from this list to improve alignment chances
@q = qw(if unless and or err for foreach while until);
@is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
# vertical alignment markers (such as an '=').
#
# Method: We look at each token $i in this output batch and set
- # $matching_token_to_go[$i] equal to those tokens at which we would
+ # $ralignment_type_to_go->[$i] equal to those tokens at which we would
# accept vertical alignment.
- my ( $ri_first, $ri_last ) = @_;
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ my $ralignment_type_to_go;
+ for my $i ( 0 .. $max_index_to_go ) {
+ $ralignment_type_to_go->[$i] = '';
+ }
# nothing to do if we aren't allowed to change whitespace
if ( !$rOpts_add_whitespace ) {
- for my $i ( 0 .. $max_index_to_go ) {
- $matching_token_to_go[$i] = '';
- }
- return;
+ return $ralignment_type_to_go;
}
# remember the index of last nonblank token before any sidecomment
$vert_last_nonblank_block_type = '';
# look at each token in this output line..
- my $count = 0;
+ my $level_beg = $levels_to_go[$ibeg];
foreach my $i ( $ibeg .. $iend ) {
my $alignment_type = '';
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
- # check for flag indicating that we should not align
- # this token
- if ( $matching_token_to_go[$i] ) {
- $matching_token_to_go[$i] = '';
+ # do not align tokens at lower level then start of line
+ # except for side comments
+ if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
+ && $types_to_go[$i] ne '#' )
+ {
+ $ralignment_type_to_go->[$i] = '';
next;
}
# align before one of these types..
# Note: add '.' after new vertical aligner is operational
- elsif ( $is_vertical_alignment_type{$type} ) {
+ elsif ( $is_vertical_alignment_type{$type}
+ && !$is_not_vertical_alignment_token{$token} )
+ {
$alignment_type = $token;
# Do not align a terminal token. Although it might
# $code =
# ( $case_matters ? $accessor : " lc($accessor) " )
# . ( $yesno ? " eq " : " ne " )
+
+ # Also, do not align a ( following a leading ? so we can
+ # align something like this:
+ # $converter{$_}->{ushortok} =
+ # $PDL::IO::Pic::biggrays
+ # ? ( m/GIF/ ? 0 : 1 )
+ # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
if ( $i == $ibeg + 2
- && $types_to_go[$ibeg] =~ /^[\.\:]$/
+ && $types_to_go[$ibeg] =~ /^[\.\:\?]$/
&& $types_to_go[ $i - 1 ] eq 'b' )
{
$alignment_type = "";
}
+ # Certain tokens only align at the same level as the
+ # initial line level
+ if ( $is_low_level_alignment_token{$token}
+ && $levels_to_go[$i] != $level_beg )
+ {
+ $alignment_type = "";
+ }
+
# For a paren after keyword, only align something like this:
# if ( $a ) { &a }
# elsif ( $b ) { &b }
- if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
- $alignment_type = ""
- unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
+ if ( $token eq '(' ) {
+
+ if ( $vert_last_nonblank_type eq 'k' ) {
+ $alignment_type = ""
+ unless $vert_last_nonblank_token =~
+ /^(if|unless|elsif)$/;
+ }
}
# be sure the alignment tokens are unique
$alignment_type = $vert_last_nonblank_type;
}
- #--------------------------------------------------------
- # patch for =~ operator. We only align this if it
- # is the first operator in a line, and the line is a simple
- # statement. Aligning them within a statement
- # interferes could interfere with other good alignments.
- #--------------------------------------------------------
- if ( $alignment_type eq '=~' ) {
- my $terminal_type = $types_to_go[$i_terminal];
- if ( $count > 0 || $max_line > 0 || $terminal_type ne ';' )
- {
- $alignment_type = "";
- }
- }
-
#--------------------------------------------------------
# then store the value
#--------------------------------------------------------
- $matching_token_to_go[$i] = $alignment_type;
- $count++ if ($alignment_type);
+ $ralignment_type_to_go->[$i] = $alignment_type;
if ( $type ne 'b' ) {
$vert_last_nonblank_type = $type;
$vert_last_nonblank_token = $token;
}
}
}
- return;
+ return $ralignment_type_to_go;
}
}
-sub terminal_type {
+sub terminal_type_i {
# returns type of last token on this line (terminal token), as follows:
# returns # for a full-line comment
# returns ' ' for a blank line
# otherwise returns final token type
- my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
+ my ( $self, $ibeg, $iend ) = @_;
- # check for full-line comment..
- if ( $rtype->[$ibeg] eq '#' ) {
- return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
+ # Start at the end and work backwards
+ my $i = $iend;
+ my $type_i = $types_to_go[$i];
+
+ # Check for side comment
+ if ( $type_i eq '#' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
}
- else {
- # start at end and walk backwards..
- for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
+ # Skip past a blank
+ if ( $type_i eq 'b' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
+
+ # Found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/grep/map because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $block_type_to_go[$i];
+ if (
+ $type_i eq '}'
+ && ( !$block_type
+ || ( $is_sort_map_grep_eval_do{$block_type} ) )
+ )
+ {
+ $type_i = 'b';
+ }
+ return wantarray ? ( $type_i, $i ) : $type_i;
+}
+
+sub terminal_type_K {
- # skip past any side comment and blanks
- next if ( $rtype->[$i] eq 'b' );
- next if ( $rtype->[$i] eq '#' );
+ # returns type of last token on this line (terminal token), as follows:
+ # returns # for a full-line comment
+ # returns ' ' for a blank line
+ # otherwise returns final token type
- # found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $terminal_type = $rtype->[$i];
- if (
- $terminal_type eq '}'
- && ( !$rblock_type->[$i]
- || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
- )
- {
- $terminal_type = 'b';
- }
- return wantarray ? ( $terminal_type, $i ) : $terminal_type;
+ my ( $self, $Kbeg, $Kend ) = @_;
+ my $rLL = $self->{rLL};
+
+ if ( !defined($Kend) ) {
+ Fault("Error in terminal_type_K: Kbeg=$Kbeg > $Kend=Kend");
+ }
+
+ # Start at the end and work backwards
+ my $K = $Kend;
+ my $type_K = $rLL->[$K]->[_TYPE_];
+
+ # Check for side comment
+ if ( $type_K eq '#' ) {
+ $K--;
+ if ( $K < $Kbeg ) {
+ return wantarray ? ( $type_K, $Kbeg ) : $type_K;
}
+ $type_K = $rLL->[$K]->[_TYPE_];
+ }
- # empty line
- return wantarray ? ( ' ', $ibeg ) : ' ';
+ # Skip past a blank
+ if ( $type_K eq 'b' ) {
+ $K--;
+ if ( $K < $Kbeg ) {
+ return wantarray ? ( $type_K, $Kbeg ) : $type_K;
+ }
+ $type_K = $rLL->[$K]->[_TYPE_];
}
+
+ # found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/grep/map because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $rLL->[$K]->[_BLOCK_TYPE_];
+ if (
+ $type_K eq '}'
+ && ( !$block_type
+ || ( $is_sort_map_grep_eval_do{$block_type} ) )
+ )
+ {
+ $type_K = 'b';
+ }
+ return wantarray ? ( $type_K, $K ) : $type_K;
+
}
{ # set_bond_strengths
my $i_break = $rcomma_index->[0];
set_forced_breakpoint($i_break);
${$rdo_not_break_apart} = 1;
- set_non_alignment_flags( $comma_count, $rcomma_index );
return;
}
${$rdo_not_break_apart} = 1;
}
}
- set_non_alignment_flags( $comma_count, $rcomma_index );
return;
}
${$rdo_not_break_apart} = 1;
}
}
- set_non_alignment_flags( $comma_count, $rcomma_index );
}
return;
}
}
}
-sub set_non_alignment_flags {
-
- # set flag which indicates that these commas should not be
- # aligned
- my ( $comma_count, $rcomma_index ) = @_;
- foreach ( 0 .. $comma_count - 1 ) {
- $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
- }
- return;
-}
-
sub study_list_complexity {
# Look for complex tables which should be formatted with one term per line.
return;
}
+sub sync_token_K {
+ my ( $self, $i ) = @_;
+
+ # Keep tokens in the rLL array in sync with the _to_go array
+ my $rLL = $self->{rLL};
+ my $K = $K_to_go[$i];
+ if ( defined($K) ) {
+ $rLL->[$K]->[_TOKEN_] = $tokens_to_go[$i];
+ }
+ else {
+ # shouldn't happen
+ }
+ return;
+}
+
{ # begin recombine_breakpoints
my %is_amp_amp;
if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
$tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
+ $self->sync_token_K($i);
my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
note_added_semicolon($line_number);
unless (
$this_line_is_semicolon_terminated
&& (
+ $type_ibeg_1 eq '}'
+ || (
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
-
- # important: only combine a very simple or
- # statement because the step below may have
- # combined a trailing 'and' with this or,
- # and we do not want to then combine
- # everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
)
);
# statement. If we see a break at any one, break at all similar tokens
# within the same container.
#
- my ( $ri_left, $ri_right ) = @_;
+ my ( $self, $ri_left, $ri_right ) = @_;
my %saw_chain_type;
my %left_chain_type;
if ( $left_chain_type{$type} ) {
next if $nobreak_to_go[ $itest - 1 ];
foreach my $i ( @{ $left_chain_type{$type} } ) {
- next unless in_same_container( $i, $itest );
+ next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest - 1;
# Break at matching ? if this : is at a different level.
if ( $right_chain_type{$type} ) {
next if $nobreak_to_go[$itest];
foreach my $i ( @{ $right_chain_type{$type} } ) {
- next unless in_same_container( $i, $itest );
+ next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest;
# break at matching ? if this : is at a different level
sub insert_final_breaks {
- my ( $ri_left, $ri_right ) = @_;
+ my ( $self, $ri_left, $ri_right ) = @_;
my $nmax = @{$ri_right} - 1;
}
# For long ternary chains,
- # if the first : we see has its # ? is in the interior
+ # if the first : we see has its ? is in the interior
# of a preceding line, then see if there are any good
# breakpoints before the ?.
if ( $i_first_colon > 0 ) {
$type eq ','
|| $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
)
- && in_same_container( $ii, $i_question )
+ && $self->in_same_container_i( $ii, $i_question )
)
{
push @insert_list, $ii;
last;
}
-
-## # For now, a good break is either a comma or a 'return'.
-## if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
-## && in_same_container( $ii, $i_question ) )
-## {
-## push @insert_list, $ii;
-## last;
-## }
}
# insert any new break points
return;
}
-sub in_same_container {
+sub in_same_container_i {
# check to see if tokens at i1 and i2 are in the
# same container, and not separated by a comma, ? or :
- # FIXME: this can be written more efficiently now
- my ( $i1, $i2 ) = @_;
- my $type = $types_to_go[$i1];
- my $depth = $nesting_depth_to_go[$i1];
- return unless ( $nesting_depth_to_go[$i2] == $depth );
- if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
-
- ###########################################################
- # This is potentially a very slow routine and not critical.
- # For safety just give up for large differences.
- # See test file 'infinite_loop.txt'
- # TODO: replace this loop with a data structure
- ###########################################################
- return if ( $i2 - $i1 > 200 );
-
- foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
- next if ( $nesting_depth_to_go[$i] > $depth );
- return if ( $nesting_depth_to_go[$i] < $depth );
-
- my $tok = $tokens_to_go[$i];
- $tok = ',' if $tok eq '=>'; # treat => same as ,
+ # This is an interface between the _to_go arrays to the rLL array
+ my ( $self, $i1, $i2 ) = @_;
+ return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
+}
+{ # sub in_same_container_K
+ my $ris_break_token;
+ my $ris_comma_token;
+
+ BEGIN {
+
+ # all cases break on seeing commas at same level
+ my @q = qw( => );
+ push @q, ',';
+ @{$ris_comma_token}{@q} = (1) x scalar(@q);
+
+ # Non-ternary text also breaks on seeing any of qw(? : || or )
# Example: we would not want to break at any of these .'s
# : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
- if ( $type ne ':' ) {
- return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
+ push @q, qw( or || ? : );
+ @{$ris_break_token}{@q} = (1) x scalar(@q);
+ }
+
+ sub in_same_container_K {
+
+ # Check to see if tokens at K1 and K2 are in the same container,
+ # and not separated by certain characters: => , ? : || or
+ # This version uses the newer $rLL data structure
+
+ my ( $self, $K1, $K2 ) = @_;
+ if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
+ my $rLL = $self->{rLL};
+ my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
+ return if ( $depth_1 < 0 );
+ return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
+
+ # Select character set to scan for
+ my $type_1 = $rLL->[$K1]->[_TYPE_];
+ my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
+
+ # Fast preliminary loop to verify that tokens are in the same container
+ my $KK = $K1;
+ while (1) {
+ $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ last if !defined($KK);
+ last if ( $KK >= $K2 );
+ my $depth_K = $rLL->[$KK]->[_SLEVEL_];
+ return if ( $depth_K < $depth_1 );
+ next if ( $depth_K > $depth_1 );
+ if ( $type_1 ne ':' ) {
+ my $tok_K = $rLL->[$KK]->[_TOKEN_];
+ return if ( $tok_K eq '?' || $tok_K eq ':' );
+ }
}
- else {
- return if ( $tok =~ /^[\,]$/ );
+
+ # Slow loop checking for certain characters
+
+ ###########################################################
+ # This is potentially a slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ ###########################################################
+ return if ( $K2 - $K1 > 200 );
+
+ foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
+
+ my $depth_K = $rLL->[$K]->[_SLEVEL_];
+ next if ( $depth_K > $depth_1 );
+ return if ( $depth_K < $depth_1 ); # redundant, checked above
+ my $tok = $rLL->[$K]->[_TOKEN_];
+ return if ( $rbreak->{$tok} );
}
+ return 1;
}
- return 1;
}
sub set_continuation_breaks {
# may be updated to be =1 for any index $i after which there must be
# a break. This signals later routines not to undo the breakpoint.
- my $saw_good_break = shift;
- my @i_first = (); # the first index to output
- my @i_last = (); # the last index to output
- my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
+ my ( $self, $saw_good_break ) = @_;
+ my @i_first = (); # the first index to output
+ my @i_last = (); # the last index to output
+ my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
set_bond_strengths();
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin; # index for starting next iteration
+ my $i_begin = $imin; # index for starting next iteration
my $leading_spaces = leading_spaces_to_go($imin);
my $line_count = 0;
# RT #104427: Dont break before opening sub brace because
# sub block breaks handled at higher level, unless
- # it looks like the preceeding list is long and broken
+ # it looks like the preceding list is long and broken
&& !(
$next_nonblank_block_type =~ /^sub\b/
&& ( $nesting_depth_to_go[$i_begin] ==
# do not break if statement is broken by side comment
next
- if (
- $tokens_to_go[$max_index_to_go] eq '#'
- && terminal_type( \@types_to_go, \@block_type_to_go, 0,
- $max_index_to_go ) !~ /^[\;\}]$/
- );
+ if ( $tokens_to_go[$max_index_to_go] eq '#'
+ && $self->terminal_type_i( 0, $max_index_to_go ) !~
+ /^[\;\}]$/ );
# no break needed if matching : is also on the line
next
return;
}
1;
-