X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lib%2FPerl%2FTidy%2FFormatter.pm;h=164ca4592ac85f65817c5d6431b66bf4b2f3f96c;hb=7f27e55dce5925b2bbe8fcfca64f385e917a52be;hp=9766e25ed4ef04250c4df5aac289330e0a108821;hpb=657098da8da16dccd551721ffc180956d8aab7fc;p=perltidy.git diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 9766e25..164ca45 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -12,7 +12,7 @@ package Perl::Tidy::Formatter; use strict; use warnings; use Carp; -our $VERSION = '20190601'; +our $VERSION = '20200110'; # The Tokenizer will be loaded with the Formatter ##use Perl::Tidy::Tokenizer; # for is_keyword() @@ -112,7 +112,6 @@ use vars qw{ @levels_to_go @leading_spaces_to_go @reduced_spaces_to_go - @matching_token_to_go @mate_index_to_go @ci_levels_to_go @nesting_depth_to_go @@ -204,6 +203,7 @@ use vars qw{ %is_anon_sub_1_brace_follower %is_sort_map_grep %is_sort_map_grep_eval + %want_one_line_block %is_sort_map_grep_eval_do %is_block_without_semicolon %is_if_unless @@ -644,12 +644,12 @@ sub new { $gnu_position_predictor = 0; # where the current token is predicted to be $max_gnu_stack_index = 0; $max_gnu_item_index = -1; - $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); - @gnu_item_list = (); - $last_output_indentation = 0; - $last_indentation_written = 0; - $last_unadjusted_indentation = 0; - $last_leading_token = ""; + $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); + @gnu_item_list = (); + $last_output_indentation = 0; + $last_indentation_written = 0; + $last_unadjusted_indentation = 0; + $last_leading_token = ""; $last_output_short_opening_token = 0; $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; @@ -663,7 +663,6 @@ sub new { @summed_lengths_to_go = (); # line length to start of ith token @token_lengths_to_go = (); @levels_to_go = (); - @matching_token_to_go = (); @mate_index_to_go = (); @ci_levels_to_go = (); @nesting_depth_to_go = (0); @@ -761,10 +760,12 @@ sub new { K_closing_container => {}, # for quickly traversing structure K_opening_ternary => {}, # for quickly traversing structure K_closing_ternary => {}, # for quickly traversing structure + rcontainer_map => {}, # hierarchical map of containers rK_phantom_semicolons => undef, # for undoing phantom semicolons if iterating rpaired_to_inner_container => {}, rbreak_container => {}, # prevent one-line blocks + rshort_nested => {}, # blocks not forced open rvalid_self_keys => [], # for checking valign_batch_count => 0, }; @@ -835,10 +836,12 @@ sub Fault { my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); + my $input_stream_name = $logger_object->get_input_stream_name(); Die(<flush(); + next; + } + # Handle all other lines of code $self->print_line_of_tokens($line_of_tokens); } @@ -1585,7 +1594,7 @@ sub break_lines { # out of __END__ and __DATA__ sections, because # the user may be using this section for any purpose whatsoever if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } - if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } + if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } if ( !$skip_line && !$in_format_skipping_section @@ -1720,6 +1729,12 @@ sub write_line { if ( $jmax >= 0 ) { $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; foreach my $j ( 0 .. $jmax ) { + + # Clip negative nesting depths to zero to avoid problems. + # Negative values can occur in files with unbalanced containers + my $slevel = $rslevels->[$j]; + if ( $slevel < 0 ) { $slevel = 0 } + my @tokary; @tokary[ _TOKEN_, _TYPE_, @@ -1734,7 +1749,7 @@ sub write_line { $rblock_type->[$j], $rcontainer_type->[$j], $rcontainer_environment->[$j], $rtype_sequence->[$j], $rlevels->[$j], $rlevels->[$j], - $rslevels->[$j], $rci_levels->[$j], + $slevel, $rci_levels->[$j], $input_line_no, ); push @{$rLL}, \@tokary; @@ -1848,7 +1863,7 @@ sub initialize_whitespace_hashes { $binary_ws_rules{'t'}{'L'} = WS_NO; $binary_ws_rules{'t'}{'{'} = WS_NO; $binary_ws_rules{'}'}{'L'} = WS_NO; - $binary_ws_rules{'}'}{'{'} = WS_NO; + $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO $binary_ws_rules{'$'}{'L'} = WS_NO; $binary_ws_rules{'$'}{'{'} = WS_NO; $binary_ws_rules{'@'}{'L'} = WS_NO; @@ -2380,8 +2395,10 @@ sub respace_tokens { # A sub to store one token in the new array # All new tokens must be stored by this sub so that it can update # all data structures on the fly. - my $last_nonblank_type = ';'; - my $store_token = sub { + my $last_nonblank_type = ';'; + my $last_nonblank_token = ';'; + my $last_nonblank_block_type = ''; + my $store_token = sub { my ($item) = @_; # This will be the index of this item in the new array @@ -2428,7 +2445,17 @@ sub respace_tokens { $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; my $type = $item->[_TYPE_]; - if ( $type ne 'b' ) { $last_nonblank_type = $type } + + # trim side comments + if ( $type eq '#' ) { + $item->[_TOKEN_] =~ s/\s*$//; + } + + if ( $type && $type ne 'b' && $type ne '#' ) { + $last_nonblank_type = $type; + $last_nonblank_token = $item->[_TOKEN_]; + $last_nonblank_block_type = $item->[_BLOCK_TYPE_]; + } # and finally, add this item to the new array push @{$rLL_new}, $item; @@ -2758,7 +2785,7 @@ sub respace_tokens { # or we are deleting all whitespace # Note that whitespace flag is a flag indicating whether a # white space BEFORE the token is needed - next if ( $KK >= $Kmax ); # skip terminal blank + next if ( $KK >= $Klast ); # skip terminal blank my $Knext = $KK + 1; my $ws = $rwhitespace_flags->[$Knext]; if ( $ws == -1 @@ -2936,6 +2963,17 @@ sub respace_tokens { } if ( $token =~ /$SUB_PATTERN/ ) { + + # -spp = 0 : no space before opening prototype paren + # -spp = 1 : stable (follow input spacing) + # -spp = 2 : always space before opening prototype paren + my $spp = $rOpts->{'space-prototype-paren'}; + if ( defined($spp) ) { + if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; } + elsif ( $spp == 2 ) { $token =~ s/\(/ (/; } + } + + # one space max, and no tabs $token =~ s/\s+/ /g; $rtoken_vars->[_TOKEN_] = $token; } @@ -2971,10 +3009,63 @@ sub respace_tokens { # check a quote for problems elsif ( $type eq 'Q' ) { + $check_Q->( $KK, $Kfirst ); + } + + # handle semicolons + elsif ( $type eq ';' ) { + + # Remove unnecessary semicolons, but not after bare + # blocks, where it could be unsafe if the brace is + # mistokenized. + if ( + $rOpts->{'delete-semicolons'} + && ( + ( + $last_nonblank_type eq '}' + && ( + $is_block_without_semicolon{ + $last_nonblank_block_type} + || $last_nonblank_block_type =~ /$SUB_PATTERN/ + || $last_nonblank_block_type =~ /^\w+:$/ ) + ) + || $last_nonblank_type eq ';' + ) + ) + { + + # This looks like a deletable semicolon, but even if a + # semicolon can be deleted it is necessarily best to do so. + # We apply these additional rules for deletion: + # - Always ok to delete a ';' at the end of a line + # - Never delete a ';' before a '#' because it would + # promote it to a block comment. + # - If a semicolon is not at the end of line, then only + # delete if it is followed by another semicolon or closing + # token. This includes the comment rule. It may take + # two passes to get to a final state, but it is a little + # safer. For example, keep the first semicolon here: + # eval { sub bubba { ok(0) }; ok(0) } || ok(1); + # It is not required but adds some clarity. + my $ok_to_delete = 1; + if ( $KK < $Klast ) { + my $Kn = $self->K_next_nonblank($KK); + if ( defined($Kn) && $Kn <= $Klast ) { + my $next_nonblank_token_type = + $rLL->[$Kn]->[_TYPE_]; + $ok_to_delete = $next_nonblank_token_type eq ';' + || $next_nonblank_token_type eq '}'; + } + } - # This is ready to go but is commented out because there is - # still identical logic in sub break_lines. - # $check_Q->($KK, $Kfirst); + if ($ok_to_delete) { + note_deleted_semicolon(); + next; + } + else { + write_logfile_entry("Extra ';'\n"); + } + } } elsif ($type_sequence) { @@ -3222,23 +3313,6 @@ sub respace_tokens { } } -=pod - # NOTE: This does not work yet. Version in print-line-of-tokens - # is Still used until fixed - - # compare input/output indentation except for continuation lines - # (because they have an unknown amount of initial blank space) - # and lines which are quotes (because they may have been outdented) - # Note: this test is placed here because we know the continuation flag - # at this point, which allows us to avoid non-meaningful checks. - my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_]; - compare_indentation_levels( $guessed_indentation_level, - $structural_indentation_level ) - unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0 - || $guessed_indentation_level == 0 - && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' ); -=cut - # Patch needed for MakeMaker. Do not break a statement # in which $VERSION may be calculated. See MakeMaker.pm; # this is based on the coding in it. @@ -3537,6 +3611,195 @@ sub K_previous_nonblank { return; } +sub map_containers { + + # Maps the container hierarchy + my $self = shift; + my $rLL = $self->{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + my $rcontainer_map = $self->{rcontainer_map}; + + # loop over containers + my @stack; # stack of container sequence numbers + my $KNEXT = 0; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + next if ( $KK == 0 ); # first token in file may not be container + Fault("sequence = $type_sequence not defined at K=$KK"); + } + + my $token = $rtoken_vars->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + if (@stack) { + $rcontainer_map->{$type_sequence} = $stack[-1]; + } + push @stack, $type_sequence; + } + if ( $is_closing_token{$token} ) { + if (@stack) { + my $seqno = pop @stack; + if ( $seqno != $type_sequence ) { + + # shouldn't happen unless file is garbage + } + } + } + } + + # the stack should be empty for a good file + if (@stack) { + + # unbalanced containers; file probably bad + } + else { + # ok + } + return; +} + +sub mark_short_nested_blocks { + + # This routine looks at the entire file and marks any short nested blocks + # which should not be broken. The results are stored in the hash + # $rshort_nested->{$type_sequence} + # which will be true if the container should remain intact. + # + # For example, consider the following line: + + # sub cxt_two { sort { $a <=> $b } test_if_list() } + + # The 'sort' block is short and nested within an outer sub block. + # Normally, the existance of the 'sort' block will force the sub block to + # break open, but this is not always desirable. Here we will set a flag for + # the sort block to prevent this. To give the user control, we will + # follow the input file formatting. If either of the blocks is broken in + # the input file then we will allow it to remain broken. Otherwise we will + # set a flag to keep it together in later formatting steps. + + # The flag which is set here will be checked in two places: + # 'sub print_line_of_tokens' and 'sub starting_one_line_block' + + my $self = shift; + my $rLL = $self->{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + + return unless ( $rOpts->{'one-line-block-nesting'} ); + + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + my $rbreak_container = $self->{rbreak_container}; + my $rshort_nested = $self->{rshort_nested}; + my $rcontainer_map = $self->{rcontainer_map}; + my $rlines = $self->{rlines}; + + # Variables needed for estimating line lengths + my $starting_indent; + my $starting_lentot; + my $length_tol = 1; + + my $excess_length_to_K = sub { + my ($K) = @_; + + # Estimate the length from the line start to a given token + my $length = $self->cumulative_length_before_K($K) - $starting_lentot; + my $excess_length = + $starting_indent + $length + $length_tol - $rOpts_maximum_line_length; + return ($excess_length); + }; + + my $is_broken_block = sub { + + # a block is broken if the input line numbers of the braces differ + my ($seqno) = @_; + my $K_opening = $K_opening_container->{$seqno}; + return unless ( defined($K_opening) ); + my $K_closing = $K_closing_container->{$seqno}; + return unless ( defined($K_closing) ); + return $rbreak_container->{$seqno} + || $rLL->[$K_closing]->[_LINE_INDEX_] != + $rLL->[$K_opening]->[_LINE_INDEX_]; + }; + + # loop over all containers + my @open_block_stack; + my $iline = -1; + my $KNEXT = 0; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + next if ( $KK == 0 ); # first token in file may not be container + + # an error here is most likely due to a recent programming change + Fault("sequence = $type_sequence not defined at K=$KK"); + } + + # We are just looking at code blocks + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + next unless ( $type eq $token ); + my $block_type = $rtoken_vars->[_BLOCK_TYPE_]; + next unless ($block_type); + + # Keep a stack of all acceptable block braces seen. + # Only consider blocks entirely on one line so dump the stack when line + # changes. + my $iline_last = $iline; + $iline = $rLL->[$KK]->[_LINE_INDEX_]; + if ( $iline != $iline_last ) { @open_block_stack = () } + + if ( $token eq '}' ) { + if (@open_block_stack) { pop @open_block_stack } + } + next unless ( $token eq '{' ); + + # block must be balanced (bad scripts may be unbalanced) + my $K_opening = $K_opening_container->{$type_sequence}; + my $K_closing = $K_closing_container->{$type_sequence}; + next unless ( defined($K_opening) && defined($K_closing) ); + + # require that this block be entirely on one line + next if ( $is_broken_block->($type_sequence) ); + + # See if this block fits on one line of allowed length (which may + # be different from the input script) + $starting_lentot = + $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + $starting_indent = 0; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $rLL->[$KK]->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + # Dump the stack if block is too long and skip this block + if ( $excess_length_to_K->($K_closing) > 0 ) { + @open_block_stack = (); + next; + } + + # OK, Block passes tests, remember it + push @open_block_stack, $type_sequence; + + # We are only marking nested code blocks, + # so check for a previous block on the stack + next unless ( @open_block_stack > 1 ); + + # Looks OK, mark this as a short nested block + $rshort_nested->{$type_sequence} = 1; + + } + return; +} + sub weld_containers { # do any welding operations @@ -3643,12 +3906,15 @@ sub weld_cuddled_blocks { # loop over structure items to find cuddled pairs my $level = 0; - my $KK = 0; - while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { + my $KNEXT = 0; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; my $rtoken_vars = $rLL->[$KK]; my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; if ( !$type_sequence ) { - Fault("sequence = $type_sequence not defined"); + next if ( $KK == 0 ); # first token in file may not be container + Fault("sequence = $type_sequence not defined at K=$KK"); } # We use the original levels because they get changed by sub @@ -3936,6 +4202,36 @@ sub weld_nested_containers { # Do not weld if this makes our line too long $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0; + # DO-NOT-WELD RULE 4; implemented for git#10: + # Do not weld an opening -ce brace if the next container is on a single + # line, different from the opening brace. (This is very rare). For + # example, given the following with -ce, we will avoid joining the { + # and [ + + # } else { + # [ $_, length($_) ] + # } + + # because this would produce a terminal one-line block: + + # } else { [ $_, length($_) ] } + + # which may not be what is desired. But given this input: + + # } else { [ $_, length($_) ] } + + # then we will do the weld and retain the one-line block + if ( $rOpts->{'cuddled-else'} ) { + my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_]; + if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) { + my $io_line = $inner_opening->[_LINE_INDEX_]; + my $ic_line = $inner_closing->[_LINE_INDEX_]; + my $oo_line = $outer_opening->[_LINE_INDEX_]; + $do_not_weld ||= + ( $oo_line < $io_line && $ic_line == $io_line ); + } + } + if ($do_not_weld) { # After neglecting a pair, we start measuring from start of point io @@ -4087,12 +4383,15 @@ sub weld_nested_quotes { }; # look for single qw quotes nested in containers - my $KK = 0; - while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { + my $KNEXT = 0; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; my $rtoken_vars = $rLL->[$KK]; my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; if ( !$outer_seqno ) { - Fault("sequence = $outer_seqno not defined"); + next if ( $KK == 0 ); # first token in file may not be container + Fault("sequence = $outer_seqno not defined at K=$KK"); } my $token = $rtoken_vars->[_TOKEN_]; @@ -4381,6 +4680,15 @@ sub resync_lines_and_tokens { $line_of_tokens->{_line_text} =~ s/\s+$//; } $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; + + # Deleting semicolons can create new empty code lines + # which should be marked as blank + if ( !defined($Kfirst) ) { + my $code_type = $line_of_tokens->{_code_type}; + if ( !$code_type ) { + $line_of_tokens->{_code_type} = 'BL'; + } + } } } @@ -4452,9 +4760,15 @@ sub finish_formatting { # remains fixed for the rest of this iteration. $self->respace_tokens(); + # Make a hierarchical map of the containers + $self->map_containers(); + # Implement any welding needed for the -wn or -cb options $self->weld_containers(); + # Locate small nested blocks which should not be broken + $self->mark_short_nested_blocks(); + # Finishes formatting and write the result to the line sink. # Eventually this call should just change the 'rlines' data according to the # new line breaks and then return so that we can do an internal iteration @@ -5190,7 +5504,7 @@ sub token_sequence_length { # return length of tokens ($ibeg .. $iend) including $ibeg & $iend # returns 0 if $ibeg > $iend (shouldn't happen) my ( $ibeg, $iend ) = @_; - return 0 if ( $iend < 0 || $ibeg > $iend ); + return 0 if ( $iend < 0 || $ibeg > $iend ); return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; } @@ -5277,7 +5591,7 @@ sub wrapup { write_logfile_entry( " Last at input line $last_deleted_semicolon_at\n"); } - write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); + write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n"); write_logfile_entry("\n"); } @@ -5379,11 +5693,16 @@ sub check_options { } } + make_sub_matching_pattern(); make_bli_pattern(); make_block_brace_vertical_tightness_pattern(); make_blank_line_pattern(); make_keyword_group_list_pattern(); + # Make initial list of desired one line block types + # They will be modified by 'prepare_cuddled_block_types' + %want_one_line_block = %is_sort_map_grep_eval; + prepare_cuddled_block_types(); if ( $rOpts->{'dump-cuddled-block-list'} ) { dump_cuddled_block_list(*STDOUT); @@ -5668,6 +5987,25 @@ EOM '?' => ':', ); + if ( $rOpts->{'ignore-old-breakpoints'} ) { + if ( $rOpts->{'break-at-old-method-breakpoints'} ) { + Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n" + ); + } + if ( $rOpts->{'break-at-old-comma-breakpoints'} ) { + Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n" + ); + } + + # Note: there are additional parameters that can be made inactive by + # -iob, but they are on by default so we would generate excessive + # warnings if we noted them. They are: + # $rOpts->{'break-at-old-keyword-breakpoints'} + # $rOpts->{'break-at-old-logical-breakpoints'} + # $rOpts->{'break-at-old-ternary-breakpoints'} + # $rOpts->{'break-at-old-attribute-breakpoints'} + } + # frequently used parameters $rOpts_add_newlines = $rOpts->{'add-newlines'}; $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; @@ -5887,6 +6225,10 @@ sub bad_pattern { $word_count++; $rcuddled_block_types->{$start}->{$word} = 1; #"$string_count.$word_count"; + + # git#9: Remove this word from the list of desired one-line + # blocks + $want_one_line_block{$word} = 0; } } return; @@ -6010,6 +6352,24 @@ sub make_closing_side_comment_list_pattern { return; } +sub make_sub_matching_pattern { + + $SUB_PATTERN = '^sub\s+(::|\w)'; + $ASUB_PATTERN = '^sub$'; + + if ( $rOpts->{'sub-alias-list'} ) { + + # Note that any 'sub-alias-list' has been preprocessed to + # be a trimmed, space-separated list which includes 'sub' + # for example, it might be 'sub method fun' + my $sub_alias_list = $rOpts->{'sub-alias-list'}; + $sub_alias_list =~ s/\s+/\|/g; + $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/; + $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/; + } + return; +} + sub make_bli_pattern { if ( defined( $rOpts->{'brace-left-and-indent-list'} ) @@ -6642,7 +7002,6 @@ EOM $container_environment_to_go[$max_index_to_go] = $container_environment; $ci_levels_to_go[$max_index_to_go] = $ci_level; $mate_index_to_go[$max_index_to_go] = -1; - $matching_token_to_go[$max_index_to_go] = ''; $bond_strength_to_go[$max_index_to_go] = 0; # Note: negative levels are currently retained as a diagnostic so that @@ -6698,38 +7057,6 @@ EOM return; } - sub insert_new_token_to_go { - - # insert a new token into the output stream. use same level as - # previous token; assumes a character at max_index_to_go. - my ( $self, @args ) = @_; - save_current_token(); - ( $token, $type, $slevel, $no_internal_newlines ) = @args; - - if ( $max_index_to_go == UNDEFINED_INDEX ) { - warning("code bug: bad call to insert_new_token_to_go\n"); - } - $level = $levels_to_go[$max_index_to_go]; - - # FIXME: it seems to be necessary to use the next, rather than - # previous, value of this variable when creating a new blank (align.t) - #my $slevel = $nesting_depth_to_go[$max_index_to_go]; - $ci_level = $ci_levels_to_go[$max_index_to_go]; - $container_environment = $container_environment_to_go[$max_index_to_go]; - $in_continued_quote = 0; - $block_type = ""; - $type_sequence = ""; - - # store an undef for the K value to catch unexpected usage - # This routine is only called by add_closing_side_comments, and - # eventually that call will be eliminated. - $Ktoken_vars = undef; - - $self->store_token_to_go(); - restore_current_token(); - return; - } - sub copy_hash { my ($rold_token_hash) = @_; my %new_token_hash = @@ -6807,14 +7134,12 @@ EOM my $rLL = $self->{rLL}; my $rbreak_container = $self->{rbreak_container}; + my $rshort_nested = $self->{rshort_nested}; if ( !defined($K_first) ) { - # Unexpected blank line.. - # Calling routine was supposed to handle this - Warn( -"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring" - ); + # Empty line: This can happen if tokens are deleted, for example + # with the -mangle parameter return; } @@ -6877,8 +7202,6 @@ EOM ###################################### if ($is_comment) { - if ( $rOpts->{'delete-block-comments'} ) { return } - if ( $rOpts->{'tee-block-comments'} ) { $file_writer_object->tee_on(); } @@ -6938,12 +7261,9 @@ EOM return; } - # TODO: Move to sub scan_comments # compare input/output indentation except for continuation lines # (because they have an unknown amount of initial blank space) # and lines which are quotes (because they may have been outdented) - # Note: this test is placed here because we know the continuation flag - # at this point, which allows us to avoid non-meaningful checks. my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_]; compare_indentation_levels( $guessed_indentation_level, $structural_indentation_level ) @@ -7055,10 +7375,6 @@ EOM if ( $type eq '#' ) { - # trim trailing whitespace - # (there is no option at present to prevent this) - $token =~ s/\s*$//; - if ( $rOpts->{'delete-side-comments'} @@ -7095,40 +7411,6 @@ EOM $next_nonblank_token_type = $rinput_token_array->[$j_next]->[_TYPE_]; - ###################### - # MAYBE MOVE ELSEWHERE? - ###################### - if ( $type eq 'Q' ) { - note_embedded_tab() if ( $token =~ "\t" ); - - # make note of something like '$var = s/xxx/yyy/;' - # in case it should have been '$var =~ s/xxx/yyy/;' - if ( - $token =~ /^(s|tr|y|m|\/)/ - && $last_nonblank_token =~ /^(=|==|!=)$/ - - # preceded by simple scalar - && $last_last_nonblank_type eq 'i' - && $last_last_nonblank_token =~ /^\$/ - - # followed by some kind of termination - # (but give complaint if we can's see far enough ahead) - && $next_nonblank_token =~ /^[; \)\}]$/ - - # scalar is not declared - && !( - $types_to_go[0] eq 'k' - && $tokens_to_go[0] =~ /^(my|our|local)$/ - ) - ) - { - my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; - complain( -"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" - ); - } - } - # Do not allow breaks which would promote a side comment to a # block comment. In order to allow a break before an opening # or closing BLOCK, followed by a side comment, those sections @@ -7138,11 +7420,13 @@ EOM ( $type eq '{' && $token eq '{' && $block_type + && !$rshort_nested->{$type_sequence} && $block_type ne 't' ); my $is_closing_BLOCK = ( $type eq '}' && $token eq '}' && $block_type + && !$rshort_nested->{$type_sequence} && $block_type ne 't' ); if ( $side_comment_follows @@ -7425,40 +7709,6 @@ EOM destroy_one_line_block(); } - # Remove unnecessary semicolons, but not after bare - # blocks, where it could be unsafe if the brace is - # mistokenized. - if ( - ( - $last_nonblank_token eq '}' - && ( - $is_block_without_semicolon{ - $last_nonblank_block_type} - || $last_nonblank_block_type =~ /$SUB_PATTERN/ - || $last_nonblank_block_type =~ /^\w+:$/ ) - ) - || $last_nonblank_type eq ';' - ) - { - - if ( - $rOpts->{'delete-semicolons'} - - # don't delete ; before a # because it would promote it - # to a block comment - && ( $next_nonblank_token_type ne '#' ) - ) - { - note_deleted_semicolon(); - $self->output_line_to_go() - unless ( $no_internal_newlines - || $index_start_one_line_block != UNDEFINED_INDEX ); - next; - } - else { - write_logfile_entry("Extra ';'\n"); - } - } $self->store_token_to_go(); $self->output_line_to_go() @@ -7562,9 +7812,6 @@ sub output_line_to_go { }; # Do not end line in a weld - # TODO: Move this fix into the routine? - #my $jnb = $max_index_to_go; - #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- } return if ( weld_len_right_to_go($max_index_to_go) ); # just set a tentative breakpoint if we might be in a one-line block @@ -7573,10 +7820,6 @@ sub output_line_to_go { return; } -## my $cscw_block_comment; -## $cscw_block_comment = $self->add_closing_side_comment() -## if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); - my $comma_arrow_count_contained = match_opening_and_closing_tokens(); # tell the -lp option we are outputting a batch so it can close @@ -7640,17 +7883,12 @@ sub output_line_to_go { my $leading_type = $types_to_go[$imin]; # blank lines before subs except declarations and one-liners - # MCONVERSION LOCATION - for sub tokenization change if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { $want_blank = $rOpts->{'blank-lines-before-subs'} - if ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) !~ /^[\;\}]$/ - ); + if ( $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ ); } # break before all package declarations - # MCONVERSION LOCATION - for tokenizaton change elsif ($leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) { @@ -7660,10 +7898,7 @@ sub output_line_to_go { # break before certain key blocks except one-liners if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { $want_blank = $rOpts->{'blank-lines-before-subs'} - if ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) ne '}' - ); + if ( $self->terminal_type_i( $imin, $imax ) ne '}' ); } # Break before certain block types if we haven't had a @@ -7686,10 +7921,7 @@ sub output_line_to_go { && $lc >= $rOpts->{'long-block-line-count'} && consecutive_nonblank_lines() >= $rOpts->{'long-block-line-count'} - && ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) ne '}' - ); + && $self->terminal_type_i( $imin, $imax ) ne '}'; } # Check for blank lines wanted before a closing brace @@ -7802,9 +8034,9 @@ sub output_line_to_go { else { ( $ri_first, $ri_last, my $colon_count ) = - set_continuation_breaks($saw_good_break); + $self->set_continuation_breaks($saw_good_break); - break_all_chain_tokens( $ri_first, $ri_last ); + $self->break_all_chain_tokens( $ri_first, $ri_last ); break_equals( $ri_first, $ri_last ); @@ -7815,7 +8047,7 @@ sub output_line_to_go { recombine_breakpoints( $ri_first, $ri_last ); } - insert_final_breaks( $ri_first, $ri_last ) if $colon_count; + $self->insert_final_breaks( $ri_first, $ri_last ) if $colon_count; } # do corrector step if -lp option is used @@ -7827,8 +8059,50 @@ sub output_line_to_go { if ( $rOpts_one_line_block_semicolons == 0 ) { $self->delete_one_line_semicolons( $ri_first, $ri_last ); } - $self->send_lines_to_vertical_aligner( $ri_first, $ri_last, - $do_not_pad ); + + # The line breaks for this batch of code have been finalized. Now we + # can to package the results for further processing. We will switch + # from the local '_to_go' buffer arrays (i-index) back to the global + # token arrays (K-index) at this point. + my $rlines_K; + my $index_error; + for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) { + my $ibeg = $ri_first->[$n]; + my $Kbeg = $K_to_go[$ibeg]; + my $iend = $ri_last->[$n]; + my $Kend = $K_to_go[$iend]; + if ( $iend - $ibeg != $Kend - $Kbeg ) { + $index_error = $n unless defined($index_error); + } + push @{$rlines_K}, + [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ]; + } + + # Check correctness of the mapping between the i and K token indexes + if ( defined($index_error) ) { + + # Temporary debug code - should never get here + for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) { + my $ibeg = $ri_first->[$n]; + my $Kbeg = $K_to_go[$ibeg]; + my $iend = $ri_last->[$n]; + my $Kend = $K_to_go[$iend]; + my $idiff = $iend - $ibeg; + my $Kdiff = $Kend - $Kbeg; + print STDERR < $rlines_K, + do_not_pad => $do_not_pad, + ibeg0 => $ri_first->[0], + }; + + $self->send_lines_to_vertical_aligner($rbatch_hash); # Insert any requested blank lines after an opening brace. We have to # skip back before any side comment to find the terminal token @@ -7855,11 +8129,6 @@ sub output_line_to_go { prepare_for_new_input_lines(); -## # output any new -cscw block comment -## if ($cscw_block_comment) { -## $self->flush(); -## $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); -## } return; } @@ -7880,7 +8149,7 @@ sub note_deleted_semicolon { $first_deleted_semicolon_at = $last_deleted_semicolon_at; } $deleted_semicolon_count++; - write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) + write_logfile_entry("Deleted unnecessary ';' at line $input_line_number\n"); return; } @@ -7909,6 +8178,7 @@ sub starting_one_line_block { my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_; my $rbreak_container = $self->{rbreak_container}; + my $rshort_nested = $self->{rshort_nested}; my $jmax_check = @{$rtoken_array}; if ( $jmax_check < $jmax ) { @@ -8028,15 +8298,26 @@ sub starting_one_line_block { if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 } else { $pos += rtoken_length($i) } + # ignore some small blocks + my $type_sequence = $rtoken_array->[$i]->[_TYPE_SEQUENCE_]; + my $nobreak = $rshort_nested->{$type_sequence}; + # Return false result if we exceed the maximum line length, if ( $pos > maximum_line_length($i_start) ) { return 0; } - # or encounter another opening brace before finding the closing brace. + # keep going for non-containers + elsif ( !$type_sequence ) { + + } + + # return if we encounter another opening brace before finding the + # closing brace. elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{' && $rtoken_array->[$i]->[_TYPE_] eq '{' - && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) + && $rtoken_array->[$i]->[_BLOCK_TYPE_] + && !$nobreak ) { return 0; } @@ -8044,7 +8325,8 @@ sub starting_one_line_block { # if we find our closing brace.. elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}' && $rtoken_array->[$i]->[_TYPE_] eq '}' - && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) + && $rtoken_array->[$i]->[_BLOCK_TYPE_] + && !$nobreak ) { # be sure any trailing comment also fits on the line @@ -8113,7 +8395,7 @@ sub starting_one_line_block { # we keep old one-line blocks but do not form new ones. It is not # always a good idea to make as many one-line blocks as possible, # so other types are not done. The user can always use -mangle. - if ( $is_sort_map_grep_eval{$block_type} ) { + if ( $want_one_line_block{$block_type} ) { create_one_line_block( $i_start, 1 ); } return 0; @@ -8159,7 +8441,7 @@ sub undo_ci { # map { $_, $lookup->{$_} } # sort { $a <=> $b } # grep { $lookup->{$_} ne $default } keys %$lookup ); - my ( $ri_first, $ri_last ) = @_; + my ( $self, $ri_first, $ri_last ) = @_; my ( $line_1, $line_2, $lev_last ); my $this_line_is_semicolon_terminated; my $max_line = @{$ri_first} - 1; @@ -8298,7 +8580,8 @@ sub undo_lp_ci { sub pad_token { # insert $pad_spaces before token number $ipad - my ( $ipad, $pad_spaces ) = @_; + my ( $self, $ipad, $pad_spaces ) = @_; + my $rLL = $self->{rLL}; if ( $pad_spaces > 0 ) { $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; } @@ -8311,6 +8594,9 @@ sub pad_token { return; } + # Keep token arrays in sync + $self->sync_token_K($ipad); + $token_lengths_to_go[$ipad] += $pad_spaces; foreach my $i ( $ipad .. $max_index_to_go ) { $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; @@ -8342,7 +8628,7 @@ sub pad_token { # &Error_OutOfRange; # } # - my ( $ri_first, $ri_last ) = @_; + my ( $self, $ri_first, $ri_last ) = @_; my $max_line = @{$ri_first} - 1; # FIXME: move these declarations below @@ -8525,7 +8811,7 @@ sub pad_token { # find any unclosed container next unless ( $type_sequence_to_go[$i] - && $mate_index_to_go[$i] > $iend ); + && $self->mate_index_to_go($i) > $iend ); # find next nonblank token to pad $ipad = $inext_to_go[$i]; @@ -8682,11 +8968,7 @@ sub pad_token { my $i2 = $ri_last->[$l]; if ( $types_to_go[$i2] eq '#' ) { my $i1 = $ri_first->[$l]; - next - if ( - terminal_type( \@types_to_go, \@block_type_to_go, - $i1, $i2 ) eq ',' - ); + next if $self->terminal_type_i( $i1, $i2 ) eq ','; } } @@ -8755,7 +9037,7 @@ sub pad_token { if ( $pad_spaces == -1 ) { if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { - pad_token( $ipad - 1, $pad_spaces ); + $self->pad_token( $ipad - 1, $pad_spaces ); } } $pad_spaces = 0; @@ -8767,7 +9049,7 @@ sub pad_token { my $length_t = total_line_length( $ibeg, $iend ); if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) { - pad_token( $ipad, $pad_spaces ); + $self->pad_token( $ipad, $pad_spaces ); } } } @@ -9435,7 +9717,7 @@ sub add_closing_side_comment { my $self = shift; # add closing side comments after closing block braces if -csc used - my $cscw_block_comment; + my ( $closing_side_comment, $cscw_block_comment ); #--------------------------------------------------------------- # Step 1: loop through all tokens of this line to accumulate @@ -9481,7 +9763,7 @@ sub add_closing_side_comment { # ..and the corresponding opening brace must is not in this batch # (because we do not need to tag one-line blocks, although this # should also be caught with a positive -csci value) - && $mate_index_to_go[$i_terminal] < 0 + && $self->mate_index_to_go($i_terminal) < 0 # ..and either && ( @@ -9601,43 +9883,23 @@ sub add_closing_side_comment { } # switch to the new csc (unless we deleted it!) - $tokens_to_go[$max_index_to_go] = $token if $token; + if ($token) { + $tokens_to_go[$max_index_to_go] = $token; + $self->sync_token_K($max_index_to_go); + } } # handle case of NO existing closing side comment else { - # Remove any existing blank and add another below. - # This is a tricky point. A side comment needs to have the same level - # as the preceding closing brace or else the line will not get the right - # indentation. So even if we have a blank, we are going to replace it. - if ( $types_to_go[$max_index_to_go] eq 'b' ) { - unstore_token_to_go(); - } - - # insert the new side comment into the output token stream - my $type = '#'; - my $block_type = ''; - my $type_sequence = ''; - my $container_environment = - $container_environment_to_go[$max_index_to_go]; - my $level = $levels_to_go[$max_index_to_go]; - my $slevel = $nesting_depth_to_go[$max_index_to_go]; - my $no_internal_newlines = 0; - - my $ci_level = $ci_levels_to_go[$max_index_to_go]; - my $in_continued_quote = 0; - - # insert a blank token - $self->insert_new_token_to_go( ' ', 'b', $slevel, - $no_internal_newlines ); - - # then the side comment - $self->insert_new_token_to_go( $token, $type, $slevel, - $no_internal_newlines ); + # To avoid inserting a new token in the token arrays, we + # will just return the new side comment so that it can be + # inserted just before it is needed in the call to the + # vertical aligner. + $closing_side_comment = $token; } } - return $cscw_block_comment; + return ( $closing_side_comment, $cscw_block_comment ); } sub previous_nonblank_token { @@ -9662,62 +9924,131 @@ sub previous_nonblank_token { sub send_lines_to_vertical_aligner { - my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_; + my ( $self, $rbatch_hash ) = @_; - my $valign_batch_number = $self->increment_valign_batch_count(); + # This routine receives a batch of code for which the final line breaks + # have been defined. Here we prepare the lines for passing to the vertical + # aligner. We do the following tasks: + # - mark certain vertical alignment tokens tokens, such as '=', in each line. + # - make minor indentation adjustments + # - insert extra blank spaces to help display certain logical constructions - my $cscw_block_comment; - if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ) { - $cscw_block_comment = $self->add_closing_side_comment(); + my $rlines_K = $rbatch_hash->{rlines_K}; + if ( !@{$rlines_K} ) { + Fault("Unexpected call with no lines"); + return; + } + my $n_last_line = @{$rlines_K} - 1; + my $do_not_pad = $rbatch_hash->{do_not_pad}; - # Add or update any closing side comment - if ( $types_to_go[$max_index_to_go] eq '#' ) { - $ri_last->[-1] = $max_index_to_go; - } + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + + my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] }; + my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_]; + my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_]; + my $type_end_next = $rLL->[$Kend_next]->[_TYPE_]; + + # Construct indexes to the global_to_go arrays so that called routines can + # still access those arrays. This might eventually be removed + # when all called routines have been converted to access token values + # in the rLL array instead. + my $ibeg0 = $rbatch_hash->{ibeg0}; + my $Kbeg0 = $Kbeg_next; + my ( $ri_first, $ri_last ); + foreach my $rline ( @{$rlines_K} ) { + my ( $Kbeg, $Kend ) = @{$rline}; + my $ibeg = $ibeg0 + $Kbeg - $Kbeg0; + my $iend = $ibeg0 + $Kend - $Kbeg0; + push @{$ri_first}, $ibeg; + push @{$ri_last}, $iend; + } + ##################################################################### + + my $valign_batch_number = $self->increment_valign_batch_count(); + + my ( $cscw_block_comment, $closing_side_comment ); + if ( $rOpts->{'closing-side-comments'} ) { + ( $closing_side_comment, $cscw_block_comment ) = + $self->add_closing_side_comment(); } my $rindentation_list = [0]; # ref to indentations for each line - # define the array @matching_token_to_go for the output tokens + # define the array @{$ralignment_type_to_go} for the output tokens # which will be non-blank for each special token (such as =>) # for which alignment is required. - set_vertical_alignment_markers( $ri_first, $ri_last ); - - # flush if necessary to avoid unwanted alignment - my $must_flush = 0; - if ( @{$ri_first} > 1 ) { + my $ralignment_type_to_go = + $self->set_vertical_alignment_markers( $ri_first, $ri_last ); - # flush before a long if statement - if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { - $must_flush = 1; - } - } - if ($must_flush) { + # flush before a long if statement to avoid unwanted alignment + if ( $n_last_line > 0 + && $type_beg_next eq 'k' + && $token_beg_next =~ /^(if|unless)$/ ) + { Perl::Tidy::VerticalAligner::flush(); } - undo_ci( $ri_first, $ri_last ); + $self->undo_ci( $ri_first, $ri_last ); - set_logical_padding( $ri_first, $ri_last ); + $self->set_logical_padding( $ri_first, $ri_last ); # loop to prepare each line for shipment - my $n_last_line = @{$ri_first} - 1; my $in_comma_list; + my ( $Kbeg, $type_beg, $token_beg ); + my ( $Kend, $type_end ); for my $n ( 0 .. $n_last_line ) { - my $ibeg = $ri_first->[$n]; - my $iend = $ri_last->[$n]; - - my ( $rtokens, $rfields, $rpatterns ) = - make_alignment_patterns( $ibeg, $iend ); - # Set flag to show how much level changes between this line - # and the next line, if we have it. - my $ljump = 0; + my $ibeg = $ri_first->[$n]; + my $iend = $ri_last->[$n]; + my $rline = $rlines_K->[$n]; + my $forced_breakpoint = $rline->[2]; + + # we may need to look at variables on three consecutive lines ... + + # Some vars on line [n-1], if any: + my $Kbeg_last = $Kbeg; + my $type_beg_last = $type_beg; + my $token_beg_last = $token_beg; + my $Kend_last = $Kend; + my $type_end_last = $type_end; + + # Some vars on line [n]: + $Kbeg = $Kbeg_next; + $type_beg = $type_beg_next; + $token_beg = $token_beg_next; + $Kend = $Kend_next; + $type_end = $type_end_next; + + # We use two slightly different definitions of level jump at the end + # of line: + # $ljump is the level jump needed by 'sub set_adjusted_indentation' + # $level_jump is the level jump needed by the vertical aligner. + my $ljump = 0; # level jump at end of line + + # Get some vars on line [n+1], if any: if ( $n < $n_last_line ) { - my $ibegp = $ri_first->[ $n + 1 ]; - $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; + ( $Kbeg_next, $Kend_next ) = + @{ $rlines_K->[ $n + 1 ] }; + $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_]; + $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_]; + $type_end_next = $rLL->[$Kend_next]->[_TYPE_]; + $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_]; } + # level jump at end of line for the vertical aligner: + my $level_jump = + $Kend >= $Klimit + ? 0 + : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_]; + + $self->delete_needless_alignments( $ibeg, $iend, + $ralignment_type_to_go ); + + my ( $rtokens, $rfields, $rpatterns ) = + $self->make_alignment_patterns( $ibeg, $iend, + $ralignment_type_to_go ); + my ( $indentation, $lev, $level_end, $terminal_type, $is_semicolon_terminated, $is_outdented_line ) = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, @@ -9727,11 +10058,11 @@ sub send_lines_to_vertical_aligner { my $outdent_long_lines = ( # which are long quotes, if allowed - ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) + ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} ) # which are long block comments, if allowed || ( - $types_to_go[$ibeg] eq '#' + $type_beg eq '#' && $rOpts->{'outdent-long-comments'} # but not if this is a static block comment @@ -9739,11 +10070,8 @@ sub send_lines_to_vertical_aligner { ) ); - my $level_jump = - $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; - my $rvertical_tightness_flags = - set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, + $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ); # flush an outdented line to avoid any unwanted vertical alignment @@ -9765,45 +10093,54 @@ sub send_lines_to_vertical_aligner { # ); # my $is_terminal_ternary = 0; - if ( $tokens_to_go[$ibeg] eq ':' - || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' ) - { - my $last_leading_type = ":"; - if ( $n > 0 ) { - my $iprev = $ri_first->[ $n - 1 ]; - $last_leading_type = $types_to_go[$iprev]; - } + + if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) { + my $last_leading_type = $n > 0 ? $type_beg_last : ':'; if ( $terminal_type ne ';' && $n_last_line > $n && $level_end == $lev ) { - my $inext = $ri_first->[ $n + 1 ]; - $level_end = $levels_to_go[$inext]; - $terminal_type = $types_to_go[$inext]; + $level_end = $rLL->[$Kbeg_next]->[_LEVEL_]; + $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_]; } + if ( + $last_leading_type eq ':' + && ( ( $terminal_type eq ';' && $level_end <= $lev ) + || ( $terminal_type ne ':' && $level_end < $lev ) ) + ) + { - $is_terminal_ternary = $last_leading_type eq ':' - && ( ( $terminal_type eq ';' && $level_end <= $lev ) - || ( $terminal_type ne ':' && $level_end < $lev ) ) + # the terminal term must not contain any ternary terms, as in + # my $ECHO = ( + # $Is_MSWin32 ? ".\\echo$$" + # : $Is_MacOS ? ":echo$$" + # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) + # ); + $is_terminal_ternary = 1; - # the terminal term must not contain any ternary terms, as in - # my $ECHO = ( - # $Is_MSWin32 ? ".\\echo$$" - # : $Is_MacOS ? ":echo$$" - # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) - # ); - && !grep { /^[\?\:]$/ } @types_to_go[ $ibeg + 1 .. $iend ]; + my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_]; + while ( defined($KP) && $KP <= $Kend ) { + my $type_KP = $rLL->[$KP]->[_TYPE_]; + if ( $type_KP eq '?' || $type_KP eq ':' ) { + $is_terminal_ternary = 0; + last; + } + $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_]; + } + } } - # send this new line down the pipe - my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; + # add any new closing side comment to the last line + if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) { + $rfields->[-1] .= " $closing_side_comment"; + } + # send this new line down the pipe my $rvalign_hash = {}; - $rvalign_hash->{level} = $lev; - $rvalign_hash->{level_end} = $level_end; - $rvalign_hash->{indentation} = $indentation; - $rvalign_hash->{is_forced_break} = - $forced_breakpoint_to_go[$iend] || $in_comma_list; + $rvalign_hash->{level} = $lev; + $rvalign_hash->{level_end} = $level_end; + $rvalign_hash->{indentation} = $indentation; + $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list; $rvalign_hash->{outdent_long_lines} = $outdent_long_lines; $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary; $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated; @@ -9816,8 +10153,7 @@ sub send_lines_to_vertical_aligner { Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields, $rtokens, $rpatterns ); - $in_comma_list = - $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; + $in_comma_list = $type_end eq ',' && $forced_breakpoint; # flush an outdented line to avoid any unwanted vertical alignment Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); @@ -9836,16 +10172,16 @@ sub send_lines_to_vertical_aligner { $last_output_short_opening_token # line ends in opening token - = $types_to_go[$iend] =~ /^[\{\(\[L]$/ + = $type_end =~ /^[\{\(\[L]$/ # and either && ( # line has either single opening token - $iend == $ibeg + $Kend == $Kbeg # or is a single token followed by opening token. # Note that sub identifiers have blanks like 'sub doit' - || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) + || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ ) ) # and limit total to 10 character widths @@ -9855,7 +10191,7 @@ sub send_lines_to_vertical_aligner { # remember indentation of lines containing opening containers for # later use by sub set_adjusted_indentation - save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); + $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); # output any new -cscw block comment if ($cscw_block_comment) { @@ -9869,6 +10205,7 @@ sub send_lines_to_vertical_aligner { my %block_type_map; my %keyword_map; + my %operator_map; BEGIN { @@ -9898,6 +10235,109 @@ sub send_lines_to_vertical_aligner { # treat an 'undef' similar to numbers and quotes 'undef' => 'Q', ); + + # map certain operators to the same class for pattern matching + %operator_map = ( + '!~' => '=~', + '+=' => '+=', + '-=' => '+=', + '*=' => '+=', + '/=' => '+=', + ); + } + + sub delete_needless_alignments { + my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_; + + # Remove unwanted alignments. This routine is a place to remove + # alignments which might cause problems at later stages. There are + # currently two types of fixes: + + # 1. Remove excess parens + # 2. Remove alignments within 'elsif' conditions + + # Patch #1: Excess alignment of parens can prevent other good + # alignments. For example, note the parens in the first two rows of + # the following snippet. They would normally get marked for alignment + # and aligned as follows: + + # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; + # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; + # my $img = new Gimp::Image( $w, $h, RGB ); + + # This causes unnecessary paren alignment and prevents the third equals + # from aligning. If we remove the unwanted alignments we get: + + # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; + # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; + # my $img = new Gimp::Image( $w, $h, RGB ); + + # A rule for doing this which works well is to remove alignment of + # parens whose containers do not contain other aligning tokens, with + # the exception that we always keep alignment of the first opening + # paren on a line (for things like 'if' and 'elsif' statements). + + # Setup needed constants + my $i_good_paren = -1; + my $imin_match = $iend + 1; + my $i_elsif_close = $ibeg - 1; + my $i_elsif_open = $iend + 1; + if ( $iend > $ibeg ) { + if ( $types_to_go[$ibeg] eq 'k' ) { + + # Paren patch: mark a location of a paren we should keep, such + # as one following something like a leading 'if', 'elsif',.. + $i_good_paren = $ibeg + 1; + if ( $types_to_go[$i_good_paren] eq 'b' ) { + $i_good_paren++; + } + + # 'elsif' patch: remember the range of the parens of an elsif, + # and do not make alignments within them because this can cause + # loss of padding and overall brace alignment in the vertical + # aligner. + if ( $tokens_to_go[$ibeg] eq 'elsif' + && $i_good_paren < $iend + && $tokens_to_go[$i_good_paren] eq '(' ) + { + $i_elsif_open = $i_good_paren; + $i_elsif_close = $self->mate_index_to_go($i_good_paren); + } + } + } + + # Loop to make the fixes on this line + my @imatch_list; + for my $i ( $ibeg .. $iend ) { + + if ( $ralignment_type_to_go->[$i] ne '' ) { + + # Patch #2: undo alignment within elsif parens + if ( $i > $i_elsif_open && $i < $i_elsif_close ) { + $ralignment_type_to_go->[$i] = ''; + next; + } + push @imatch_list, $i; + + } + if ( $tokens_to_go[$i] eq ')' ) { + + # Patch #1: undo the corresponding opening paren if: + # - it is at the top of the stack + # - and not the first overall opening paren + # - does not follow a leading keyword on this line + my $imate = $self->mate_index_to_go($i); + if ( @imatch_list + && $imatch_list[-1] eq $imate + && ( $ibeg > 1 || @imatch_list > 1 ) + && $imate > $i_good_paren ) + { + $ralignment_type_to_go->[$imate] = ''; + pop @imatch_list; + } + } + } + return; } sub make_alignment_patterns { @@ -9921,7 +10361,7 @@ sub send_lines_to_vertical_aligner { # @patterns - a modified list of token types, one for each alignment # field. These should normally each match before alignment is # allowed, even when the alignment tokens match. - my ( $ibeg, $iend ) = @_; + my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_; my @tokens = (); my @fields = (); my @patterns = (); @@ -9934,6 +10374,7 @@ sub send_lines_to_vertical_aligner { my $j = 0; # field index $patterns[0] = ""; + my %token_count; for my $i ( $ibeg .. $iend ) { # Keep track of containers balanced on this line only. @@ -9944,7 +10385,7 @@ sub send_lines_to_vertical_aligner { if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) { # if container is balanced on this line... - my $i_mate = $mate_index_to_go[$i]; + my $i_mate = $self->mate_index_to_go($i); if ( $i_mate > $i && $i_mate <= $iend ) { $depth++; my $seqno = $type_sequence_to_go[$i]; @@ -10001,15 +10442,22 @@ sub send_lines_to_vertical_aligner { # matches. # if we are not aligning on this paren... - if ( $matching_token_to_go[$i] eq '' ) { - - # Sum length from previous alignment, or start of line. - my $len = - ( $i_start == $ibeg ) - ? total_line_length( $i_start, $i - 1 ) - : token_sequence_length( $i_start, $i - 1 ); + if ( $ralignment_type_to_go->[$i] eq '' ) { + + # Sum length from previous alignment + my $len = token_sequence_length( $i_start, $i - 1 ); + if ( $i_start == $ibeg ) { + + # For first token, use distance from start of line + # but subtract off the indentation due to level. + # Otherwise, results could vary with indentation. + $len += leading_spaces_to_go($ibeg) - + $levels_to_go[$i_start] * $rOpts_indent_columns; + if ( $len < 0 ) { $len = 0 } + } - # tack length onto the container name to make unique + # tack this length onto the container name to try + # to make a unique token name $container_name[$depth] .= "-" . $len; } } @@ -10020,12 +10468,13 @@ sub send_lines_to_vertical_aligner { # if we find a new synchronization token, we are done with # a field - if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { + if ( $i > $i_start && $ralignment_type_to_go->[$i] ne '' ) { - my $tok = my $raw_tok = $matching_token_to_go[$i]; + my $tok = my $raw_tok = $ralignment_type_to_go->[$i]; # map similar items - if ( $tok eq '!~' ) { $tok = '=~' } + my $tok_map = $operator_map{$tok}; + $tok = $tok_map if ($tok_map); # make separators in different nesting depths unique # by appending the nesting depth digit. @@ -10084,6 +10533,23 @@ sub send_lines_to_vertical_aligner { $tok .= $block_type; } + # Mark multiple copies of certain tokens with the copy number + # This will allow the aligner to decide if they are matched. + # For now, only do this for equals. For example, the two + # equals on the next line will be labeled '=0' and '=0.2'. + # Later, the '=0.2' will be ignored in alignment because it + # has no match. + + # $| = $debug = 1 if $opt_d; + # $full_index = 1 if $opt_i; + + if ( $raw_tok eq '=' || $raw_tok eq '=>' ) { + $token_count{$tok}++; + if ( $token_count{$tok} > 1 ) { + $tok .= '.' . $token_count{$tok}; + } + } + # concatenate the text of the consecutive tokens to form # the field push( @fields, @@ -10242,7 +10708,7 @@ sub send_lines_to_vertical_aligner { # saves indentations of lines of all unmatched opening tokens. # These will be used by sub get_opening_indentation. - my ( $ri_first, $ri_last, $rindentation_list ) = @_; + my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_; # we no longer need indentations of any saved indentations which # are unmatched closing tokens in this batch, because we will @@ -10287,7 +10753,7 @@ sub get_opening_indentation { # which matches the token at index $i_opening # -and its offset (number of columns) from the start of the line # - my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; + my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; # first, see if the opening token is in the current batch my $i_opening = $mate_index_to_go[$i_closing]; @@ -10354,6 +10820,11 @@ sub lookup_opening_indentation { my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; + if ( !@{$ri_last} ) { + warning("Error in opening_indentation: no lines"); + return; + } + my $nline = $rindentation_list->[0]; # line number of previous lookup # reset line location if necessary @@ -10413,7 +10884,7 @@ sub lookup_opening_indentation { # we need to know the last token of this line my ( $terminal_type, $i_terminal ) = - terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); + $self->terminal_type_i( $ibeg, $iend ); my $is_outdented_line = 0; @@ -10502,8 +10973,8 @@ sub lookup_opening_indentation { $opening_indentation, $opening_offset, $is_leading, $opening_exists ) - = get_opening_indentation( $ibeg_weld_fix, $ri_first, $ri_last, - $rindentation_list ); + = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first, + $ri_last, $rindentation_list ); # First set the default behavior: if ( @@ -10568,13 +11039,20 @@ sub lookup_opening_indentation { # undo continuation indentation of a terminal closing token if # it is the last token before a level decrease. This will allow # a closing token to line up with its opening counterpart, and - # avoids a indentation jump larger than 1 level. + # avoids an indentation jump larger than 1 level. if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ && $i_terminal == $ibeg && defined($K_beg) ) { my $K_next_nonblank = $self->K_next_code($K_beg); - if ( defined($K_next_nonblank) ) { + + # Patch for RT#131115: honor -bli flag at closing brace + my $is_bli = + $rOpts_brace_left_and_indent + && $block_type_to_go[$i_terminal] + && $block_type_to_go[$i_terminal] =~ /$bli_pattern/o; + + if ( !$is_bli && defined($K_next_nonblank) ) { my $lev = $rLL->[$K_beg]->[_LEVEL_]; my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_]; $adjust_indentation = 1 if ( $level_next < $lev ); @@ -10594,8 +11072,8 @@ sub lookup_opening_indentation { $opening_indentation, $opening_offset, $is_leading, $opening_exists ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); + = $self->get_opening_indentation( $ibeg, $ri_first, + $ri_last, $rindentation_list ); my $indentation = $leading_spaces_to_go[$ibeg]; if ( defined($opening_indentation) && get_spaces($indentation) > @@ -10618,7 +11096,7 @@ sub lookup_opening_indentation { $opening_indentation, $opening_offset, $is_leading, $opening_exists ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, + = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); my $indentation = $leading_spaces_to_go[$ibeg]; if ( defined($opening_indentation) @@ -10693,7 +11171,7 @@ sub lookup_opening_indentation { $opening_indentation, $opening_offset, $is_leading, $opening_exists ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, + = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); if ($is_leading) { $adjust_indentation = 2; } } @@ -10876,11 +11354,10 @@ sub lookup_opening_indentation { # updated per bug report in alex_bug.pl: we must not # mess with the indentation of closing logical braces so # we must treat something like '} else {' as if it were - # an isolated brace my $is_isolated_block_brace = ( - # $iend == $ibeg ) && $block_type_to_go[$ibeg]; + # an isolated brace ############################################################# my $is_isolated_block_brace = $block_type_to_go[$ibeg] - && ( $iend == $ibeg + && ( $i_terminal == $ibeg || $is_if_elsif_else_unless_while_until_for_foreach{ $block_type_to_go[$ibeg] } ); @@ -10958,9 +11435,69 @@ sub lookup_opening_indentation { } } +sub mate_index_to_go { + my ( $self, $i ) = @_; + + # Return the matching index of a container or ternary pair + # This is equivalent to the array @mate_index_to_go + my $K = $K_to_go[$i]; + my $K_mate = $self->K_mate_index($K); + my $i_mate = -1; + if ( defined($K_mate) ) { + $i_mate = $i + ( $K_mate - $K ); + if ( $i_mate < 0 || $i_mate > $max_index_to_go ) { + $i_mate = -1; + } + } + my $i_mate_alt = $mate_index_to_go[$i]; + + # Debug code to eventually be removed + if ( 0 && $i_mate_alt != $i_mate ) { + my $tok = $tokens_to_go[$i]; + my $type = $types_to_go[$i]; + my $tok_mate = '*'; + my $type_mate = '*'; + if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) { + $tok_mate = $tokens_to_go[$i_mate]; + $type_mate = $types_to_go[$i_mate]; + } + my $seq = $type_sequence_to_go[$i]; + my $file = $logger_object->get_input_stream_name(); + + Warn( +"mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate" + ); + } + return $i_mate; +} + +sub K_mate_index { + + # Given the index K of an opening or closing container, or ?/: ternary pair, + # return the index K of the other member of the pair. + my ( $self, $K ) = @_; + return unless defined($K); + my $rLL = $self->{rLL}; + my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_]; + return unless ($seqno); + + my $K_opening = $self->{K_opening_container}->{$seqno}; + if ( defined($K_opening) ) { + if ( $K != $K_opening ) { return $K_opening } + return $self->{K_closing_container}->{$seqno}; + } + + $K_opening = $self->{K_opening_ternary}->{$seqno}; + if ( defined($K_opening) ) { + if ( $K != $K_opening ) { return $K_opening } + return $self->{K_closing_ternary}->{$seqno}; + } + return; +} + sub set_vertical_tightness_flags { - my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; + my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; # Define vertical tightness controls for the nth line of a batch. # We create an array of parameters which tell the vertical aligner @@ -11164,10 +11701,8 @@ sub set_vertical_tightness_flags { my $is_semicolon_terminated; if ( $n + 1 == $n_last_line ) { - my ( $terminal_type, $i_terminal ) = terminal_type( - \@types_to_go, \@block_type_to_go, - $ibeg_next, $iend_next - ); + my ( $terminal_type, $i_terminal ) = + $self->terminal_type_i( $ibeg_next, $iend_next ); $is_semicolon_terminated = $terminal_type eq ';' && $nesting_depth_to_go[$iend_next] < $nesting_depth_to_go[$ibeg_next]; @@ -11251,8 +11786,10 @@ sub get_seqno { { my %is_vertical_alignment_type; + my %is_not_vertical_alignment_token; my %is_vertical_alignment_keyword; my %is_terminal_alignment_type; + my %is_low_level_alignment_token; BEGIN { @@ -11265,10 +11802,19 @@ sub get_seqno { #; @is_vertical_alignment_type{@q} = (1) x scalar(@q); - # only align these at end of line + # These 'tokens' are not aligned. We need this to remove [ + # from the above list because it has type ='{' + @q = qw([); + @is_not_vertical_alignment_token{@q} = (1) x scalar(@q); + + # these are the only types aligned at a line end @q = qw(&& ||); @is_terminal_alignment_type{@q} = (1) x scalar(@q); + # these tokens only align at line level + @q = ( '{', '(' ); + @is_low_level_alignment_token{@q} = (1) x scalar(@q); + # eq and ne were removed from this list to improve alignment chances @q = qw(if unless and or err for foreach while until); @is_vertical_alignment_keyword{@q} = (1) x scalar(@q); @@ -11281,17 +11827,19 @@ sub get_seqno { # vertical alignment markers (such as an '='). # # Method: We look at each token $i in this output batch and set - # $matching_token_to_go[$i] equal to those tokens at which we would + # $ralignment_type_to_go->[$i] equal to those tokens at which we would # accept vertical alignment. - my ( $ri_first, $ri_last ) = @_; + my ( $self, $ri_first, $ri_last ) = @_; + + my $ralignment_type_to_go; + for my $i ( 0 .. $max_index_to_go ) { + $ralignment_type_to_go->[$i] = ''; + } # nothing to do if we aren't allowed to change whitespace if ( !$rOpts_add_whitespace ) { - for my $i ( 0 .. $max_index_to_go ) { - $matching_token_to_go[$i] = ''; - } - return; + return $ralignment_type_to_go; } # remember the index of last nonblank token before any sidecomment @@ -11318,17 +11866,19 @@ sub get_seqno { $vert_last_nonblank_block_type = ''; # look at each token in this output line.. - my $count = 0; + my $level_beg = $levels_to_go[$ibeg]; foreach my $i ( $ibeg .. $iend ) { my $alignment_type = ''; my $type = $types_to_go[$i]; my $block_type = $block_type_to_go[$i]; my $token = $tokens_to_go[$i]; - # check for flag indicating that we should not align - # this token - if ( $matching_token_to_go[$i] ) { - $matching_token_to_go[$i] = ''; + # do not align tokens at lower level then start of line + # except for side comments + if ( $levels_to_go[$i] < $levels_to_go[$ibeg] + && $types_to_go[$i] ne '#' ) + { + $ralignment_type_to_go->[$i] = ''; next; } @@ -11383,7 +11933,9 @@ sub get_seqno { # align before one of these types.. # Note: add '.' after new vertical aligner is operational - elsif ( $is_vertical_alignment_type{$type} ) { + elsif ( $is_vertical_alignment_type{$type} + && !$is_not_vertical_alignment_token{$token} ) + { $alignment_type = $token; # Do not align a terminal token. Although it might @@ -11409,20 +11961,38 @@ sub get_seqno { # $code = # ( $case_matters ? $accessor : " lc($accessor) " ) # . ( $yesno ? " eq " : " ne " ) + + # Also, do not align a ( following a leading ? so we can + # align something like this: + # $converter{$_}->{ushortok} = + # $PDL::IO::Pic::biggrays + # ? ( m/GIF/ ? 0 : 1 ) + # : ( m/GIF|RAST|IFF/ ? 0 : 1 ); if ( $i == $ibeg + 2 - && $types_to_go[$ibeg] =~ /^[\.\:]$/ + && $types_to_go[$ibeg] =~ /^[\.\:\?]$/ && $types_to_go[ $i - 1 ] eq 'b' ) { $alignment_type = ""; } + # Certain tokens only align at the same level as the + # initial line level + if ( $is_low_level_alignment_token{$token} + && $levels_to_go[$i] != $level_beg ) + { + $alignment_type = ""; + } + # For a paren after keyword, only align something like this: # if ( $a ) { &a } # elsif ( $b ) { &b } - if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { - $alignment_type = "" - unless $vert_last_nonblank_token =~ - /^(if|unless|elsif)$/; + if ( $token eq '(' ) { + + if ( $vert_last_nonblank_type eq 'k' ) { + $alignment_type = "" + unless $vert_last_nonblank_token =~ + /^(if|unless|elsif)$/; + } } # be sure the alignment tokens are unique @@ -11470,25 +12040,10 @@ sub get_seqno { $alignment_type = $vert_last_nonblank_type; } - #-------------------------------------------------------- - # patch for =~ operator. We only align this if it - # is the first operator in a line, and the line is a simple - # statement. Aligning them within a statement - # interferes could interfere with other good alignments. - #-------------------------------------------------------- - if ( $alignment_type eq '=~' ) { - my $terminal_type = $types_to_go[$i_terminal]; - if ( $count > 0 || $max_line > 0 || $terminal_type ne ';' ) - { - $alignment_type = ""; - } - } - #-------------------------------------------------------- # then store the value #-------------------------------------------------------- - $matching_token_to_go[$i] = $alignment_type; - $count++ if ($alignment_type); + $ralignment_type_to_go->[$i] = $alignment_type; if ( $type ne 'b' ) { $vert_last_nonblank_type = $type; $vert_last_nonblank_token = $token; @@ -11496,50 +12051,106 @@ sub get_seqno { } } } - return; + return $ralignment_type_to_go; } } -sub terminal_type { +sub terminal_type_i { # returns type of last token on this line (terminal token), as follows: # returns # for a full-line comment # returns ' ' for a blank line # otherwise returns final token type - my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; + my ( $self, $ibeg, $iend ) = @_; - # check for full-line comment.. - if ( $rtype->[$ibeg] eq '#' ) { - return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg]; + # Start at the end and work backwards + my $i = $iend; + my $type_i = $types_to_go[$i]; + + # Check for side comment + if ( $type_i eq '#' ) { + $i--; + if ( $i < $ibeg ) { + return wantarray ? ( $type_i, $ibeg ) : $type_i; + } + $type_i = $types_to_go[$i]; } - else { - # start at end and walk backwards.. - for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { + # Skip past a blank + if ( $type_i eq 'b' ) { + $i--; + if ( $i < $ibeg ) { + return wantarray ? ( $type_i, $ibeg ) : $type_i; + } + $type_i = $types_to_go[$i]; + } + + # Found it..make sure it is a BLOCK termination, + # but hide a terminal } after sort/grep/map because it is not + # necessarily the end of the line. (terminal.t) + my $block_type = $block_type_to_go[$i]; + if ( + $type_i eq '}' + && ( !$block_type + || ( $is_sort_map_grep_eval_do{$block_type} ) ) + ) + { + $type_i = 'b'; + } + return wantarray ? ( $type_i, $i ) : $type_i; +} + +sub terminal_type_K { - # skip past any side comment and blanks - next if ( $rtype->[$i] eq 'b' ); - next if ( $rtype->[$i] eq '#' ); + # returns type of last token on this line (terminal token), as follows: + # returns # for a full-line comment + # returns ' ' for a blank line + # otherwise returns final token type - # found it..make sure it is a BLOCK termination, - # but hide a terminal } after sort/grep/map because it is not - # necessarily the end of the line. (terminal.t) - my $terminal_type = $rtype->[$i]; - if ( - $terminal_type eq '}' - && ( !$rblock_type->[$i] - || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) ) - ) - { - $terminal_type = 'b'; - } - return wantarray ? ( $terminal_type, $i ) : $terminal_type; + my ( $self, $Kbeg, $Kend ) = @_; + my $rLL = $self->{rLL}; + + if ( !defined($Kend) ) { + Fault("Error in terminal_type_K: Kbeg=$Kbeg > $Kend=Kend"); + } + + # Start at the end and work backwards + my $K = $Kend; + my $type_K = $rLL->[$K]->[_TYPE_]; + + # Check for side comment + if ( $type_K eq '#' ) { + $K--; + if ( $K < $Kbeg ) { + return wantarray ? ( $type_K, $Kbeg ) : $type_K; } + $type_K = $rLL->[$K]->[_TYPE_]; + } - # empty line - return wantarray ? ( ' ', $ibeg ) : ' '; + # Skip past a blank + if ( $type_K eq 'b' ) { + $K--; + if ( $K < $Kbeg ) { + return wantarray ? ( $type_K, $Kbeg ) : $type_K; + } + $type_K = $rLL->[$K]->[_TYPE_]; } + + # found it..make sure it is a BLOCK termination, + # but hide a terminal } after sort/grep/map because it is not + # necessarily the end of the line. (terminal.t) + my $block_type = $rLL->[$K]->[_BLOCK_TYPE_]; + if ( + $type_K eq '}' + && ( !$block_type + || ( $is_sort_map_grep_eval_do{$block_type} ) ) + ) + { + $type_K = 'b'; + } + return wantarray ? ( $type_K, $K ) : $type_K; + } { # set_bond_strengths @@ -14272,7 +14883,6 @@ sub find_token_starting_list { my $i_break = $rcomma_index->[0]; set_forced_breakpoint($i_break); ${$rdo_not_break_apart} = 1; - set_non_alignment_flags( $comma_count, $rcomma_index ); return; } @@ -14305,7 +14915,6 @@ sub find_token_starting_list { ${$rdo_not_break_apart} = 1; } } - set_non_alignment_flags( $comma_count, $rcomma_index ); return; } @@ -14405,7 +15014,6 @@ sub find_token_starting_list { ${$rdo_not_break_apart} = 1; } } - set_non_alignment_flags( $comma_count, $rcomma_index ); } return; } @@ -14432,17 +15040,6 @@ sub find_token_starting_list { } } -sub set_non_alignment_flags { - - # set flag which indicates that these commas should not be - # aligned - my ( $comma_count, $rcomma_index ) = @_; - foreach ( 0 .. $comma_count - 1 ) { - $matching_token_to_go[ $rcomma_index->[$_] ] = 1; - } - return; -} - sub study_list_complexity { # Look for complex tables which should be formatted with one term per line. @@ -14819,6 +15416,21 @@ sub undo_forced_breakpoint_stack { return; } +sub sync_token_K { + my ( $self, $i ) = @_; + + # Keep tokens in the rLL array in sync with the _to_go array + my $rLL = $self->{rLL}; + my $K = $K_to_go[$i]; + if ( defined($K) ) { + $rLL->[$K]->[_TOKEN_] = $tokens_to_go[$i]; + } + else { + # shouldn't happen + } + return; +} + { # begin recombine_breakpoints my %is_amp_amp; @@ -14942,6 +15554,7 @@ sub undo_forced_breakpoint_stack { if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) { $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;'; + $self->sync_token_K($i); my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] ); note_added_semicolon($line_number); @@ -15777,17 +16390,20 @@ sub undo_forced_breakpoint_stack { unless ( $this_line_is_semicolon_terminated && ( + $type_ibeg_1 eq '}' + || ( - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } - - # important: only combine a very simple or - # statement because the step below may have - # combined a trailing 'and' with this or, - # and we do not want to then combine - # everything together - && ( $iend_2 - $ibeg_2 <= 7 ) + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + # important: only combine a very simple or + # statement because the step below may have + # combined a trailing 'and' with this or, + # and we do not want to then combine + # everything together + && ( $iend_2 - $ibeg_2 <= 7 ) + ) ) ); @@ -15980,7 +16596,7 @@ sub break_all_chain_tokens { # statement. If we see a break at any one, break at all similar tokens # within the same container. # - my ( $ri_left, $ri_right ) = @_; + my ( $self, $ri_left, $ri_right ) = @_; my %saw_chain_type; my %left_chain_type; @@ -16052,7 +16668,7 @@ sub break_all_chain_tokens { if ( $left_chain_type{$type} ) { next if $nobreak_to_go[ $itest - 1 ]; foreach my $i ( @{ $left_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); + next unless $self->in_same_container_i( $i, $itest ); push @insert_list, $itest - 1; # Break at matching ? if this : is at a different level. @@ -16079,7 +16695,7 @@ sub break_all_chain_tokens { if ( $right_chain_type{$type} ) { next if $nobreak_to_go[$itest]; foreach my $i ( @{ $right_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); + next unless $self->in_same_container_i( $i, $itest ); push @insert_list, $itest; # break at matching ? if this : is at a different level @@ -16218,7 +16834,7 @@ sub break_equals { sub insert_final_breaks { - my ( $ri_left, $ri_right ) = @_; + my ( $self, $ri_left, $ri_right ) = @_; my $nmax = @{$ri_right} - 1; @@ -16237,7 +16853,7 @@ sub insert_final_breaks { } # For long ternary chains, - # if the first : we see has its # ? is in the interior + # if the first : we see has its ? is in the interior # of a preceding line, then see if there are any good # breakpoints before the ?. if ( $i_first_colon > 0 ) { @@ -16264,20 +16880,12 @@ sub insert_final_breaks { $type eq ',' || $type eq 'k' && ( $nmax > 1 && $token eq 'return' ) ) - && in_same_container( $ii, $i_question ) + && $self->in_same_container_i( $ii, $i_question ) ) { push @insert_list, $ii; last; } - -## # For now, a good break is either a comma or a 'return'. -## if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) -## && in_same_container( $ii, $i_question ) ) -## { -## push @insert_list, $ii; -## last; -## } } # insert any new break points @@ -16289,42 +16897,84 @@ sub insert_final_breaks { return; } -sub in_same_container { +sub in_same_container_i { # check to see if tokens at i1 and i2 are in the # same container, and not separated by a comma, ? or : - # FIXME: this can be written more efficiently now - my ( $i1, $i2 ) = @_; - my $type = $types_to_go[$i1]; - my $depth = $nesting_depth_to_go[$i1]; - return unless ( $nesting_depth_to_go[$i2] == $depth ); - if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } - - ########################################################### - # This is potentially a very slow routine and not critical. - # For safety just give up for large differences. - # See test file 'infinite_loop.txt' - # TODO: replace this loop with a data structure - ########################################################### - return if ( $i2 - $i1 > 200 ); - - foreach my $i ( $i1 + 1 .. $i2 - 1 ) { - next if ( $nesting_depth_to_go[$i] > $depth ); - return if ( $nesting_depth_to_go[$i] < $depth ); - - my $tok = $tokens_to_go[$i]; - $tok = ',' if $tok eq '=>'; # treat => same as , + # This is an interface between the _to_go arrays to the rLL array + my ( $self, $i1, $i2 ) = @_; + return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] ); +} +{ # sub in_same_container_K + my $ris_break_token; + my $ris_comma_token; + + BEGIN { + + # all cases break on seeing commas at same level + my @q = qw( => ); + push @q, ','; + @{$ris_comma_token}{@q} = (1) x scalar(@q); + + # Non-ternary text also breaks on seeing any of qw(? : || or ) # Example: we would not want to break at any of these .'s # : "$str" - if ( $type ne ':' ) { - return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; + push @q, qw( or || ? : ); + @{$ris_break_token}{@q} = (1) x scalar(@q); + } + + sub in_same_container_K { + + # Check to see if tokens at K1 and K2 are in the same container, + # and not separated by certain characters: => , ? : || or + # This version uses the newer $rLL data structure + + my ( $self, $K1, $K2 ) = @_; + if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) } + my $rLL = $self->{rLL}; + my $depth_1 = $rLL->[$K1]->[_SLEVEL_]; + return if ( $depth_1 < 0 ); + return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 ); + + # Select character set to scan for + my $type_1 = $rLL->[$K1]->[_TYPE_]; + my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token; + + # Fast preliminary loop to verify that tokens are in the same container + my $KK = $K1; + while (1) { + $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; + last if !defined($KK); + last if ( $KK >= $K2 ); + my $depth_K = $rLL->[$KK]->[_SLEVEL_]; + return if ( $depth_K < $depth_1 ); + next if ( $depth_K > $depth_1 ); + if ( $type_1 ne ':' ) { + my $tok_K = $rLL->[$KK]->[_TOKEN_]; + return if ( $tok_K eq '?' || $tok_K eq ':' ); + } } - else { - return if ( $tok =~ /^[\,]$/ ); + + # Slow loop checking for certain characters + + ########################################################### + # This is potentially a slow routine and not critical. + # For safety just give up for large differences. + # See test file 'infinite_loop.txt' + ########################################################### + return if ( $K2 - $K1 > 200 ); + + foreach my $K ( $K1 + 1 .. $K2 - 1 ) { + + my $depth_K = $rLL->[$K]->[_SLEVEL_]; + next if ( $depth_K > $depth_1 ); + return if ( $depth_K < $depth_1 ); # redundant, checked above + my $tok = $rLL->[$K]->[_TOKEN_]; + return if ( $rbreak->{$tok} ); } + return 1; } - return 1; } sub set_continuation_breaks { @@ -16357,10 +17007,10 @@ sub set_continuation_breaks { # may be updated to be =1 for any index $i after which there must be # a break. This signals later routines not to undo the breakpoint. - my $saw_good_break = shift; - my @i_first = (); # the first index to output - my @i_last = (); # the last index to output - my @i_colon_breaks = (); # needed to decide if we have to break at ?'s + my ( $self, $saw_good_break ) = @_; + my @i_first = (); # the first index to output + my @i_last = (); # the last index to output + my @i_colon_breaks = (); # needed to decide if we have to break at ?'s if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } set_bond_strengths(); @@ -16369,7 +17019,7 @@ sub set_continuation_breaks { my $imax = $max_index_to_go; if ( $types_to_go[$imin] eq 'b' ) { $imin++ } if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - my $i_begin = $imin; # index for starting next iteration + my $i_begin = $imin; # index for starting next iteration my $leading_spaces = leading_spaces_to_go($imin); my $line_count = 0; @@ -16532,7 +17182,7 @@ sub set_continuation_breaks { # RT #104427: Dont break before opening sub brace because # sub block breaks handled at higher level, unless - # it looks like the preceeding list is long and broken + # it looks like the preceding list is long and broken && !( $next_nonblank_block_type =~ /^sub\b/ && ( $nesting_depth_to_go[$i_begin] == @@ -16786,11 +17436,9 @@ sub set_continuation_breaks { # do not break if statement is broken by side comment next - if ( - $tokens_to_go[$max_index_to_go] eq '#' - && terminal_type( \@types_to_go, \@block_type_to_go, 0, - $max_index_to_go ) !~ /^[\;\}]$/ - ); + if ( $tokens_to_go[$max_index_to_go] eq '#' + && $self->terminal_type_i( 0, $max_index_to_go ) !~ + /^[\;\}]$/ ); # no break needed if matching : is also on the line next @@ -17036,4 +17684,3 @@ sub compare_indentation_levels { return; } 1; -