X-Git-Url: https://git.donarmstrong.com/perltidy.git?a=blobdiff_plain;f=lib%2FPerl%2FTidy%2FVerticalAligner.pm;h=7efabb58f1b22e59e5649fcfa40a1530625e2803;hb=7f27e55dce5925b2bbe8fcfca64f385e917a52be;hp=2ae6e19bab5781dc7abdfd18a423b754c53c8c97;hpb=657098da8da16dccd551721ffc180956d8aab7fc;p=perltidy.git diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm index 2ae6e19..7efabb5 100644 --- a/lib/Perl/Tidy/VerticalAligner.pm +++ b/lib/Perl/Tidy/VerticalAligner.pm @@ -1,7 +1,7 @@ package Perl::Tidy::VerticalAligner; use strict; use warnings; -our $VERSION = '20190601'; +our $VERSION = '20200110'; use Perl::Tidy::VerticalAligner::Alignment; use Perl::Tidy::VerticalAligner::Line; @@ -1199,8 +1199,6 @@ sub fix_terminal_else { my $jmax = @{$rfields} - 1; return unless ( $jmax > 0 ); - #my $old_line = $group_lines[-1]; - # check for balanced else block following if/elsif/unless my $rfields_old = $old_line->get_rfields(); @@ -1627,8 +1625,11 @@ sub salvage_equality_matches { # If we had a peek at the subsequent line we could make a much better # decision here, but for now this is not available. for ( my $j = 1 ; $j < $jmax_new - 1 ; $j++ ) { - my $new_tok = $rtokens->[$j]; - my $is_good_alignment = ( $new_tok =~ /^(=|\?|if|unless|\|\||\&\&)/ ); + my $new_tok = $rtokens->[$j]; + + # git#16: do not consider fat commas as good aligmnents here + my $is_good_alignment = + ( $new_tok =~ /^(=|\?|if|unless|\|\||\&\&)/ && $new_tok !~ /^=>/ ); return if ($is_good_alignment); } @@ -2052,8 +2053,6 @@ sub my_flush { my @new_lines = @group_lines; initialize_for_new_group(); - ##my $has_terminal_ternary = $new_lines[-1]->{_is_terminal_ternary}; - # remove unmatched tokens in all lines delete_unmatched_tokens( \@new_lines ); @@ -2098,10 +2097,12 @@ sub my_flush { # BEFORE this line unless both it and the previous line have side # comments. This prevents this line from pushing side coments out # to the right. - ##elsif ( $new_line->get_jmax() == 1 ) { elsif ( $new_line->get_jmax() == 1 && !$keep_group_intact ) { - # There are no matching tokens, so now check side comments: + # There are no matching tokens, so now check side comments. + # Programming note: accessing arrays with index -1 is + # risky in Perl, but we have verified there is at least one + # line in the group and that there is at least one field. my $prev_comment = $group_lines[-1]->get_rfields()->[-1]; my $side_comment = $new_line->get_rfields()->[-1]; my_flush_code() unless ( $side_comment && $prev_comment ); @@ -2270,6 +2271,38 @@ EOM return; } +sub decode_alignment_token { + + # Unpack the values packed in an alignment token + # + # Usage: + # my ( $raw_tok, $lev, $tag, $tok_count ) = + # decode_alignment_token($token); + + # Alignment tokens have a trailing decimal level and optional tag (for + # commas): + # For example, the first comma in the following line + # sub banner { crlf; report( shift, '/', shift ); crlf } + # is decorated as follows: + # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6) + + # An optional token count may be appended with a leading dot. + # Currently this is only done for '=' tokens but this could change. + # For example, consider the following line: + # $nport = $port = shift || $name; + # The first '=' may either be '=0' or '=0.1' [level 0, first equals] + # The second '=' will be '=0.2' [level 0, second equals] + my ($tok) = @_; + my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 ); + if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) { + $raw_tok = $1; + $lev = $2; + $tag = $3 if ($3); + $tok_count = $5 if ($5); + } + return ( $raw_tok, $lev, $tag, $tok_count ); +} + { # sub is_deletable_token my %is_deletable_equals; @@ -2287,33 +2320,28 @@ EOM sub is_deletable_token { - # Determine if an token with no match possibility can be removed to + # Determine if a token with no match possibility can be removed to # improve chances of making an alignment. my ( $token, $i, $imax, $jline, $i_eq ) = @_; - # Strip off the level and other stuff appended to the token. - # Tokens have a trailing decimal level and optional tag (for commas): - # For example, the first comma in the following line - # sub banner { crlf; report( shift, '/', shift ); crlf } - # is decorated as follows: - # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6) - my ( $tok, $lev, $tag ) = ( $token, 0, "" ); - if ( $tok =~ /^(\D+)(\d+)(.*)$/ ) { $tok = $1; $lev = $2; $tag = $3 } - ##print "$token >> $tok $lev $tag\n"; + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token($token); + + # okay to delete second and higher copies of a token + if ( $tok_count > 1 ) { return 1 } # only remove lower level commas - ##if ( $tok eq ',' ) { return unless $lev > $group_level; } - if ( $tok eq ',' ) { + if ( $raw_tok eq ',' ) { - #print "tok=$tok, lev=$lev, gl=$group_level, i=$i, ieq=$i_eq\n"; return if ( defined($i_eq) && $i < $i_eq ); - return if ( $lev >= $group_level ); + return if ( $lev <= $group_level ); } # most operators with an equals sign should be retained if at # same level as this statement - elsif ( $tok =~ /=/ ) { - return unless ( $lev > $group_level || $is_deletable_equals{$tok} ); + elsif ( $raw_tok =~ /=/ ) { + return + unless ( $lev > $group_level || $is_deletable_equals{$raw_tok} ); } # otherwise, ok to delete the token @@ -2324,46 +2352,62 @@ EOM sub delete_unmatched_tokens { my ($rlines) = @_; - # We will look at each line of a collection and compare its alignment - # tokens with its neighbors. If it has alignment tokens which do not match - # either neighbor, then we will usually remove them. This will - # simplify later work and improve chances of aligning. + # This is a preliminary step in vertical alignment in which we remove as + # many obviously un-needed alignment tokens as possible. This will prevent + # them from interfering with the final alignment. return unless @{$rlines}; my $has_terminal_match = $rlines->[-1]->get_j_terminal_match(); - # ignore hanging side comments + # ignore hanging side comments in these operations my @filtered = grep { !$_->{_is_hanging_side_comment} } @{$rlines}; my $rnew_lines = \@filtered; my @i_equals; + my @min_levels; + + my $jmax = @{$rnew_lines} - 1; + + my %is_good_tok; - # Step 1: create a hash of tokens for each line + # create a hash of tokens for each line my $rline_hashes = []; foreach my $line ( @{$rnew_lines} ) { my $rhash = {}; my $rtokens = $line->get_rtokens(); my $i = 0; my $i_eq; + my $lev_min; foreach my $tok ( @{$rtokens} ) { - $rhash->{$tok} = [ $i, undef, undef ]; + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token($tok); + if ( !defined($lev_min) || $lev < $lev_min ) { $lev_min = $lev } + + # Possible future upgrade: for multiple matches, + # record [$i1, $i2, ..] instead of $i + $rhash->{$tok} = + [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ]; # remember the first equals at line level - if ( !defined($i_eq) && $tok =~ /^=(\d+)/ ) { - my $lev = $1; + if ( !defined($i_eq) && $raw_tok eq '=' ) { if ( $lev eq $group_level ) { $i_eq = $i } } $i++; } push @{$rline_hashes}, $rhash; - push @i_equals, $i_eq; + push @i_equals, $i_eq; + push @min_levels, $lev_min; } - # Step 2: compare each line pair and record matches - for ( my $jl = 0 ; $jl < @{$rline_hashes} - 1 ; $jl++ ) { + # compare each line pair and record matches + my $rtok_hash = {}; + my $nr = 0; + for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) { + my $nl = $nr; + $nr = 0; my $jr = $jl + 1; my $rhash_l = $rline_hashes->[$jl]; my $rhash_r = $rline_hashes->[$jr]; - my $count = 0; + my $count = 0; # UNUSED NOW? my $ntoks = 0; foreach my $tok ( keys %{$rhash_l} ) { $ntoks++; @@ -2373,126 +2417,323 @@ sub delete_unmatched_tokens { my $ir = $rhash_r->{$tok}->[0]; $rhash_l->{$tok}->[2] = $ir; $rhash_r->{$tok}->[1] = $il; + if ( $tok ne '#' ) { + push @{ $rtok_hash->{$tok} }, ( $jl, $jr ); + $nr++; + } } } + + # Set a line break if no matching tokens between these lines + if ( $nr == 0 && $nl > 0 ) { + $rnew_lines->[$jl]->{_end_group} = 1; + } } - # Step 3: remove unmatched tokens - my $jj = 0; - my $jmax = @{$rnew_lines} - 1; - foreach my $line ( @{$rnew_lines} ) { - my $rtokens = $line->get_rtokens(); - my $rhash = $rline_hashes->[$jj]; - my $i = 0; - my $nl = 0; - my $nr = 0; - my $i_eq = $i_equals[$jj]; - my @idel; - my $imax = @{$rtokens} - 2; - - for ( my $i = 0 ; $i <= $imax ; $i++ ) { - my $tok = $rtokens->[$i]; - next if ( $tok eq '#' ); # shouldn't happen - my ( $il, $ir ) = @{ $rhash->{$tok} }[ 1, 2 ]; - $nl++ if defined($il); - $nr++ if defined($ir); - if ( - !defined($il) - && !defined($ir) - && is_deletable_token( $tok, $i, $imax, $jj, $i_eq ) + # find subgroups + my @subgroups; + push @subgroups, [ 0, $jmax ]; + for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) { + if ( $rnew_lines->[$jl]->{_end_group} ) { + $subgroups[-1]->[1] = $jl; + push @subgroups, [ $jl + 1, $jmax ]; + } + } - # Patch: do not touch the first line of a terminal match, - # such as below, because j_terminal has already been set. - # if ($tag) { $tago = "<$tag>"; $tagc = ""; } - # else { $tago = $tagc = ''; } - # But see snippets 'else1.t' and 'else2.t' - && !( $jj == 0 && $has_terminal_match && $jmax == 1 ) + # Loop to process each subgroups + foreach my $item (@subgroups) { + my ( $jbeg, $jend ) = @{$item}; - ) - { - push @idel, $i; + # look for complete ternary or if/elsif/else blocks + my $nlines = $jend - $jbeg + 1; + my %token_line_count; + for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { + my %seen; + my $line = $rnew_lines->[$jj]; + my $rtokens = $line->get_rtokens(); + foreach my $tok ( @{$rtokens} ) { + if ( !$seen{$tok} ) { + $seen{$tok}++; + $token_line_count{$tok}++; + } + } + } + + # Look for if/else/elsif and ternary blocks + my $is_full_block; + foreach my $tok ( keys %token_line_count ) { + if ( $token_line_count{$tok} == $nlines ) { + if ( $tok =~ /^\?/ || $tok =~ /^\{\d+if/ ) { + $is_full_block = 1; + } } } - if (@idel) { delete_selected_tokens( $line, \@idel ) } + # remove unwanted alignment tokens + for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { + my $line = $rnew_lines->[$jj]; + my $rtokens = $line->get_rtokens(); + my $rhash = $rline_hashes->[$jj]; + my $i = 0; + my $i_eq = $i_equals[$jj]; + my @idel; + my $imax = @{$rtokens} - 2; + my $delete_above_level; + + for ( my $i = 0 ; $i <= $imax ; $i++ ) { + my $tok = $rtokens->[$i]; + next if ( $tok eq '#' ); # shouldn't happen + my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) = + @{ $rhash->{$tok} }; + + # always remove unmatched tokens + my $delete_me = !defined($il) && !defined($ir); + + # also, if this is a complete ternary or if/elsif/else block, + # remove all alignments which are not also in every line + $delete_me ||= + ( $is_full_block && $token_line_count{$tok} < $nlines ); + + # Remove all tokens above a certain level following a previous + # deletion. For example, we have to remove tagged higher level + # alignment tokens following a => deletion because the tags of + # higher level tokens will now be incorrect. For example, this + # will prevent aligning commas as follows after deleting the + # second => + # $w->insert( + # ListBox => origin => [ 270, 160 ], + # size => [ 200, 55 ], + # ); + if ( defined($delete_above_level) ) { + if ( $lev > $delete_above_level ) { + $delete_me ||= 1; #$tag; + } + else { $delete_above_level = undef } + } + + if ( + $delete_me + && is_deletable_token( $tok, $i, $imax, $jj, $i_eq ) + + # Patch: do not touch the first line of a terminal match, + # such as below, because j_terminal has already been set. + # if ($tag) { $tago = "<$tag>"; $tagc = ""; } + # else { $tago = $tagc = ''; } + # But see snippets 'else1.t' and 'else2.t' + && !( $jj == $jbeg && $has_terminal_match && $nlines == 2 ) + + ) + { + push @idel, $i; + if ( !defined($delete_above_level) + || $lev < $delete_above_level ) + { + + # delete all following higher level alignments + $delete_above_level = $lev; - # set a break if this is an interior line with possible left matches - # but no matches to the right. We do not do this for the last line - # because it could be followed by hanging side comments filtered out - # above. - if ( $nr == 0 && $nl > 0 && $jj < @{$rnew_lines} - 1 ) { - $rnew_lines->[$jj]->{_end_group} = 1; + # but keep deleting after => to next lower level + # to avoid some bizarre alignments + if ( $raw_tok eq '=>' ) { + $delete_above_level = $lev - 1; + } + } + } + } + + if (@idel) { delete_selected_tokens( $line, \@idel ) } } - $jj++; - } + } # End loop over subgroups - #use Data::Dumper; - #print Data::Dumper->Dump( [$rline_hashes] ); return; } -sub decide_if_aligned_pair { +{ # decide_if_aligned_pair - # Do not try to align two lines which are not really similar - return unless ( @group_lines == 2 ); - return if ($is_matching_terminal_line); + my %is_if_or; + my %is_assignment; - my $group_list_type = $group_lines[0]->get_list_type(); + BEGIN { - my $rtokens = $group_lines[0]->get_rtokens(); - my $leading_equals = ( $rtokens->[0] =~ /=/ ); - - # A marginal match is a match which has different patterns. Normally, we - # should not allow exactly two lines to match if marginal. But we will modify - # this rule for two lines with a leading equals-like operator such that we - # match if the patterns to the left of the equals are the same. So for - # example the following two lines are a marginal match but have the same - # left side patterns, so we will align the equals. - # my $orig = my $format = "^<<<<< ~~\n"; - # my $abc = "abc"; - # But these have a different left pattern so they will not be aligned - # $xmldoc .= $`; - # $self->{'leftovers'} .= "get_rpatterns(); - my $rpatterns1 = $group_lines[1]->get_rpatterns(); - my $pat0 = $rpatterns0->[0]; - my $pat1 = $rpatterns1->[0]; - $is_marginal = $pat0 ne $pat1; + my @q = qw( + if or || + ); + @is_if_or{@q} = (1) x scalar(@q); + + @q = qw( + = **= += *= &= <<= &&= + -= /= |= >>= ||= //= + .= %= ^= + x= + ); + @is_assignment{@q} = (1) x scalar(@q); } - my $do_not_align = ( + sub decide_if_aligned_pair { + + # Do not try to align two lines which are not really similar + return unless ( @group_lines == 2 ); + return if ($is_matching_terminal_line); # always align lists - !$group_list_type + my $group_list_type = $group_lines[0]->get_list_type(); + return 0 if ($group_list_type); + + my $jmax0 = $group_lines[0]->get_jmax(); + my $jmax1 = $group_lines[1]->get_jmax(); + my $rtokens = $group_lines[0]->get_rtokens(); + my $leading_equals = ( $rtokens->[0] =~ /=/ ); + + # scan the tokens on the second line + my $rtokens1 = $group_lines[1]->get_rtokens(); + my $saw_if_or; # if we saw an 'if' or 'or' at group level + my $raw_tokb = ""; # first token seen at group level + for ( my $j = 0 ; $j < $jmax1 - 1 ; $j++ ) { + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token( $rtokens1->[$j] ); + if ( $raw_tok && $lev == $group_level ) { + if ( !$raw_tokb ) { $raw_tokb = $raw_tok } + $saw_if_or ||= $is_if_or{$raw_tok}; + } + } - && ( + # A marginal match is a match which has different patterns. Normally, + # we should not allow exactly two lines to match if marginal. But + # we can allow matching in some specific cases. + my $is_marginal = $marginal_match; - # don't align if it was just a marginal match - $is_marginal ##$marginal_match + # lines with differing number of alignment tokens are marginal + $is_marginal ||= + $previous_maximum_jmax_seen != $previous_minimum_jmax_seen + && !$is_assignment{$raw_tokb}; - # don't align two lines with big gap - # NOTE: I am not sure if this test is actually functional any longer - || $group_maximum_gap > 12 + # We will use the line endings to help decide on alignments... + # See if the lines end with semicolons... + my $rpatterns0 = $group_lines[0]->get_rpatterns(); + my $rpatterns1 = $group_lines[1]->get_rpatterns(); + my $sc_term0; + my $sc_term1; + if ( $jmax0 < 1 || $jmax1 < 1 ) { - # or lines with differing number of alignment tokens - || ( $previous_maximum_jmax_seen != $previous_minimum_jmax_seen - && !$leading_equals ) - ) - ); + # shouldn't happen + } + else { + my $pat0 = $rpatterns0->[ $jmax0 - 1 ]; + my $pat1 = $rpatterns1->[ $jmax1 - 1 ]; + $sc_term0 = $pat0 =~ /;b?$/; + $sc_term1 = $pat1 =~ /;b?$/; + } - # But try to convert them into a simple comment group if the first line - # a has side comment - my $rfields = $group_lines[0]->get_rfields(); - my $maximum_field_index = $group_lines[0]->get_jmax(); - if ( $do_not_align - && ( length( $rfields->[$maximum_field_index] ) > 0 ) ) - { - combine_fields(); - $do_not_align = 0; + if ( !$is_marginal && !$sc_term0 ) { + + # First line of assignment should be semicolon terminated. + # For example, do not align here: + # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} = + # $$href{-NUM_DIRS} = 0; + if ( $is_assignment{$raw_tokb} ) { + $is_marginal = 1; + } + } + + # Try to avoid some undesirable alignments of opening tokens + # for example, the space between grep and { here: + # return map { ( $_ => $_ ) } + # grep { /$handles/ } $self->_get_delegate_method_list; + $is_marginal ||= + ( $raw_tokb eq '(' || $raw_tokb eq '{' ) + && $jmax1 == 2 + && $sc_term0 ne $sc_term1; + + # Undo the marginal match flag in certain cases, + if ($is_marginal) { + + # Two lines with a leading equals-like operator are allowed to + # align if the patterns to the left of the equals are the same. + # For example the following two lines are a marginal match but have + # the same left side patterns, so we will align the equals. + # my $orig = my $format = "^<<<<< ~~\n"; + # my $abc = "abc"; + # But these have a different left pattern so they will not be + # aligned + # $xmldoc .= $`; + # $self->{'leftovers'} .= "[0]; + my $pat1 = $rpatterns1->[0]; + + ########################################################## + # Turn off the marginal flag for some types of assignments + ########################################################## + if ( $is_assignment{$raw_tokb} ) { + + # undo marginal flag if first line is semicolon terminated + # and leading patters match + if ($sc_term0) { # && $sc_term1) { + $is_marginal = $pat0 ne $pat1; + } + } + elsif ( $raw_tokb eq '=>' ) { + + # undo marginal flag if patterns match + $is_marginal = $pat0 ne $pat1; + } + elsif ( $raw_tokb eq '=~' ) { + + # undo marginal flag if both lines are semicolon terminated + # and leading patters match + if ( $sc_term1 && $sc_term0 ) { + $is_marginal = $pat0 ne $pat1; + } + } + + ###################################################### + # Turn off the marginal flag if we saw an 'if' or 'or' + ###################################################### + + # A trailing 'if' and 'or' often gives a good alignment + # For example, we can align these: + # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/; + # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/; + + # or + # $d_in_m[2] = 29 if ( &Date_LeapYear($y) ); + # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] ); + + if ($saw_if_or) { + + # undo marginal flag if both lines are semicolon terminated + if ( $sc_term0 && $sc_term1 ) { + $is_marginal = 0; + } + } + } + + ############################### + # Set the return flag: + # Don't align if still marginal + ############################### + my $do_not_align = $is_marginal; + + # But try to convert them into a simple comment group if the first line + # a has side comment + my $rfields = $group_lines[0]->get_rfields(); + my $maximum_field_index = $group_lines[0]->get_jmax(); + if ( $do_not_align + && ( length( $rfields->[$maximum_field_index] ) > 0 ) ) + { + combine_fields(); + $do_not_align = 0; + } + return $do_not_align; } - return $do_not_align; } sub adjust_side_comment { @@ -2959,7 +3200,9 @@ sub valign_output_step_B { my @seqno_last = ( split /:/, $last_nonblank_seqno_string ); my @seqno_now = ( split /:/, $seqno_string ); - if ( $seqno_now[-1] == $seqno_last[0] + if ( @seqno_now + && @seqno_last + && $seqno_now[-1] == $seqno_last[0] && $seqno_now[0] == $seqno_last[-1] ) {