From 318c006b49e5611a53600ac1f3534f6856d20644 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Sat, 18 Jul 2020 17:00:35 -0700 Subject: [PATCH] add tail-wag-dog rule to vertical alignment --- lib/Perl/Tidy/VerticalAligner.pm | 307 +++++++++++++++---------------- 1 file changed, 147 insertions(+), 160 deletions(-) diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm index 46af23b6..a1ce87b1 100644 --- a/lib/Perl/Tidy/VerticalAligner.pm +++ b/lib/Perl/Tidy/VerticalAligner.pm @@ -1037,6 +1037,7 @@ sub fix_terminal_else { { # sub check_match my %is_good_alignment; + my $EXPLAIN; BEGIN { @@ -1045,6 +1046,8 @@ sub fix_terminal_else { my @q = qw( { ? => = ); push @q, (','); @is_good_alignment{@q} = (1) x scalar(@q); + + $EXPLAIN = 0; } sub check_match { @@ -1060,9 +1063,9 @@ sub fix_terminal_else { my $jmax = $new_line->get_jmax(); my $maximum_field_index = $old_line->get_jmax(); - # Variable $imax_align will be set to indicate the maximum token index - # to be matched in the left-to-right sweep, in the case that this line - # does not exactly match the current group. + # Variable $imax_align will be set to indicate the maximum token index + # to be matched in the left-to-right sweep, in the case that this line + # does not exactly match the current group. my $imax_align = -1; # variable $GoToLoc explains reason for no match, for debugging @@ -1138,7 +1141,7 @@ sub fix_terminal_else { # Everything up to the first digit is the actual token. my ( $alignment_token, $lev, $tag, $tok_count ) = - decode_alignment_token($new_tok); + decode_alignment_token($new_tok); # see if the decorated tokens match my $tokens_match = $new_tok eq $old_tok @@ -1262,9 +1265,8 @@ sub fix_terminal_else { } # Everything matches so far, so we can update the maximum index - # for partial alignment. We can avoid some poor alignments if - # we just align to tokens at group level. - $imax_align = $j if ($lev == $group_level); + # for partial alignment. + $imax_align = $j; } ## end for my $j ( 0 .. $jlimit) @@ -1286,24 +1288,29 @@ sub fix_terminal_else { { $marginal_match = 0; } + ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; } # The tokens match, but the lines must have identical number of # tokens to join the group. if ( $maximum_field_index != $jmax ) { - $GoToLoc = "token count differs"; - $imax_align = $jmax - 2; + $GoToLoc = "token count differs"; goto NO_MATCH; } - #print "match, imax_align=$imax_align, jmax=$jmax\n"; - return ($imax_align); + $EXPLAIN && print "match, imax_align=$imax_align, jmax=$jmax\n"; + + # The tokens match. Now See if there is space for this line in the + # current group. + check_fit( $new_line, $old_line, $jlimit ); + + return; NO_MATCH: # variable $GoToLoc is for debugging -##print "no match because $GoToLoc, flag=$imax_align\n"; + $EXPLAIN && print "no match because $GoToLoc, flag=$imax_align\n"; end_rgroup($imax_align); return; @@ -1315,33 +1322,34 @@ sub check_fit { my ( $new_line, $old_line, $imax_align ) = @_; return unless (@group_lines); - my $jmax = $new_line->get_jmax(); - my $leading_space_count = $new_line->get_leading_space_count(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rfield_lengths = $new_line->get_rfield_lengths(); - my $rpatterns = $new_line->get_rpatterns(); + # The new line has alignments identical to the current group. Now we have + # to see if the new line can fit into the group without causing a field + # to exceed the line length limit. If it cannot, we will end the current + # group and start a new one. + + my $jmax = $new_line->get_jmax(); + my $leading_space_count = $new_line->get_leading_space_count(); + my $rfield_lengths = $new_line->get_rfield_lengths(); my $group_list_type = $group_lines[0]->get_list_type(); my $padding_so_far = 0; my $padding_available = $old_line->get_available_space_on_right(); - # save current columns in case this doesn't work + # Save current columns in case this line does not fit. save_alignment_columns(); + # Loop over all alignments ... my $maximum_field_index = $old_line->get_jmax(); for my $j ( 0 .. $jmax ) { - ##my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j); my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j); if ( $j == 0 ) { $pad += $leading_space_count; } - # remember largest gap of the group, excluding gap to side comment + # Remember largest gap of the group, excluding gap to side comment. if ( $pad < 0 && $group_maximum_gap < -$pad && $j > 0 @@ -1350,90 +1358,19 @@ sub check_fit { $group_maximum_gap = -$pad; } + # Keep going if this field does not need any space. next if $pad < 0; - ## OLD NOTES: - ## This patch helps sometimes, but it doesn't check to see if - ## the line is too long even without the side comment. It needs - ## to be reworked. - ##don't let a long token with no trailing side comment push - ##side comments out, or end a group. (sidecmt1.t) - ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0); - - # BEGIN PATCH for keith1.txt. - # If the group began matching multiple tokens but later this got - # reduced to a fewer number of matching tokens, then the fields - # of the later lines will still have to fit into their corresponding - # fields. So a large later field will "push" the other fields to - # the right, including previous side comments, and if there is no room - # then there is no match. - # For example, look at the last line in the following snippet: - - # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false; - # my $env = ($b_prod_db) ? "prd" : "val"; - # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL"; - # my $task = $OPT{t}; - # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash"; - - # The long term will push the '?' to the right to fit in, and in this - # case there is not enough room so it will not match the equals unless - # we do something special. - - # Usually it looks good to keep an initial alignment of '=' going, and - # we can do this if the long term can fit in the space taken up by the - # remaining fields (the ? : fields here). - - # Allowing any matching token for now, but it could be restricted - # to an '='-like token if necessary. - - if ( - $pad > $padding_available - && $jmax == 2 # matching one thing (plus #) - && $j == $jmax - 1 # at last field - && @group_lines > 1 # more than 1 line in group now - && $jmax < $maximum_field_index # other lines have more fields - && $rfield_lengths->[$jmax] == 0 # no side comment - - # Uncomment to match only equals (but this does not seem necessary) - # && $rtokens->[0] =~ /^=\d/ # matching an equals - ) - { - my $extra_padding = 0; - foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) { - $extra_padding += $old_line->current_field_width($jj); - } - - next if ( $pad <= $padding_available + $extra_padding ); - } - - # END PATCH for keith1.pl - - # This line will need space; lets see if we want to accept it.. - if ( - - # not if this won't fit - ( $pad > $padding_available ) - - # previously, there were upper bounds placed on padding here - # (maximum_whitespace_columns), but they were not really helpful + # See if it needs too much space. + if ( $pad > $padding_available ) { - ) - { - - # revert to starting state then flush; things didn't work out + # Not enough room for it; revert to starting state then flush. restore_alignment_columns(); end_rgroup($imax_align); last; } - # patch to avoid excessive gaps in previous lines, - # due to a line of fewer fields. - # return join( ".", - # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, - # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); - next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); - - # looks ok, squeeze this field in + # This line fits, squeeze it in. $old_line->increase_field_width( $j, $pad ); $padding_available -= $pad; @@ -1749,7 +1686,7 @@ sub my_flush { # Undo alignment of some poor two-line combinations. # We had to wait until now to know the line count. - decide_if_aligned_pair(); + decide_if_aligned_pair($imax_align); $rgroups->[-1]->[2] = $imax_align; @@ -1828,17 +1765,11 @@ sub sweep_top_to_bottom { end_rgroup(-1) unless ( $side_comment && $prev_comment ); } - # ------------------------------------------------------------- - # Flush previous group unless all common tokens and patterns - # match.. - my $imax_align = check_match( $new_line, $base_line ); - - # ------------------------------------------------------------- - # See if there is space for this line in the current group (if - # any) - # ------------------------------------------------------------- - check_fit( $new_line, $base_line, $imax_align ) if (@group_lines); + # See if the new line matches and fits the current group. + # Flush the current group if not. + check_match( $new_line, $base_line ); + # Store the new line add_to_rgroup( $new_line, $jline ); if ( defined($j_terminal_match) ) { @@ -1921,6 +1852,9 @@ sub sweep_left_to_right { # Hash to hold the maximum alignment change for any group my %max_move; + # a small number of columns + my $short_pad = 4; + my $ng = -1; foreach my $item ( @{$rgroups} ) { $ng++; @@ -1935,8 +1869,9 @@ sub sweep_left_to_right { $jbeg_m = $jbeg; $jend_m = $jend; - # Get values for this group. Note that we just have to use values for - # one of the lines of the group since all members have the same alignments. + # Get values for this group. Note that we just have to use values for + # one of the lines of the group since all members have the same + # alignments. ( $jbeg, $jend, $istop ) = @{$item}; $line = $rlines->[$jbeg]; @@ -1973,8 +1908,8 @@ sub sweep_left_to_right { # is a compromise to keep some vertical alignment but prevent large # gaps, which do not look good for just two lines. my $ng_m = $ng - 1; - $max_move{"$ng_m"} = $rOpts_indent_columns; - $max_move{"$ng"} = $rOpts_indent_columns; + $max_move{"$ng_m"} = $short_pad; + $max_move{"$ng"} = $short_pad; } # Loop to find all common leading tokens. @@ -2019,12 +1954,12 @@ sub sweep_left_to_right { ############################### # Step 3: Execute the task list ############################### - do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move ); + do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad ); return; } sub do_left_to_right_sweep { - my ( $rlines, $rgroups, $rtodo, $rmax_move ) = @_; + my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad ) = @_; my $move_to_common_column = sub { @@ -2045,9 +1980,9 @@ sub do_left_to_right_sweep { $line->increase_field_width( $itok, $move ); } - # Note that we continue on even if the move would have been - # negative. We could also throw a switch to stop at this point, - # but if we keep going we may get some additional alignments. + # Note that we continue on even if the move would have been + # negative. We could also throw a switch to stop at this point, + # but if we keep going we may get some additional alignments. # So there may be jumps in aligned/non-aligned tokens when # we are running out of space, but it does not seem to look # any worse than stopping altogether. @@ -2063,10 +1998,14 @@ sub do_left_to_right_sweep { my $ng_first; # index of the first group of a continuous sequence my $col_want; # the common alignment column of a sequence of groups my $col_limit; # maximum column before bumping into max line length + my $line_count_ng_m = 0; + my $jmax_m; + my $istop_m; # Loop over the groups foreach my $ng ( $ng_beg .. $ng_end ) { - my ( $jbeg, $jend ) = @{ $rgroups->[$ng] }; + my ( $jbeg, $jend, $istop ) = @{ $rgroups->[$ng] }; + my $line_count_ng = $jend - $jbeg + 1; # Important: note that since all lines in a group have a common # alignments object, we just have to work on one of the lines (the @@ -2075,28 +2014,63 @@ sub do_left_to_right_sweep { my $jmax = $line->get_jmax(); # the maximum space without exceeding the line length: - my $col = $line->get_column($itok); my $avail = $line->get_available_space_on_right(); + my $col = $line->get_column($itok); my $col_max = $col + $avail; # Initialize on first group if ( !defined($col_want) ) { - $ng_first = $ng; - $col_want = $col; - $col_limit = $col_max; + $ng_first = $ng; + $col_want = $col; + $col_limit = $col_max; + $line_count_ng_m = $line_count_ng; + $jmax_m = $jmax; + $istop_m = $istop; next; } + # RULE: prevent a 'tail-wag-dog' syndrom: + # Do not let one or two lines with a different number of alignments + # open up a big gap in a large block. For example, we will prevent + # something like this, where the first line prys open the rest: + + # $worksheet->write( "B7", "http://www.perl.com", undef, $format ); + # $worksheet->write( "C7", "", $format ); + # $worksheet->write( "D7", "", $format ); + # $worksheet->write( "D8", "", $format ); + # $worksheet->write( "D8", "", $format ); + + # We should exclude from consideration two groups which are + # effectively the same but separated because one does not + # fit in the maximum allowed line length. + my $is_same_group = $jmax == $jmax_m && $istop_m == $jmax_m - 2; + my $is_big_gap; + if ( !$is_same_group ) { + $is_big_gap ||= + $line_count_ng >= 4 + && $line_count_ng_m <= 2 + && $col_want > $col + $short_pad; + $is_big_gap ||= + $line_count_ng_m >= 4 + && $line_count_ng <= 2 + && $col > $col_want + $short_pad; + } + # quit and restart if it cannot join this batch - if ( $col_want > $col_max || $col > $col_limit ) { + if ( $col_want > $col_max || $col > $col_limit || $is_big_gap ) { $move_to_common_column->( $ng_first, $ng - 1, $itok, $col_want ); - $ng_first = $ng; - $col_want = $col; - $col_limit = $col_max; + $ng_first = $ng; + $col_want = $col; + $col_limit = $col_max; + $line_count_ng_m = $line_count_ng; + $jmax_m = $jmax; + $istop_m = $istop; next; } + $line_count_ng_m += $line_count_ng; + # update the common column and limit if ( $col > $col_want ) { $col_want = $col } if ( $col_max < $col_limit ) { $col_limit = $col_max } @@ -2551,7 +2525,6 @@ sub delete_unmatched_tokens { ) { -##print "deleting token $i tok=$tok\n"; push @idel, $i; if ( !defined($delete_above_level) || $lev < $delete_above_level ) @@ -3088,7 +3061,7 @@ sub prune_alignment_tree { # $deep1 ~~ $deep1; # So we will use two thresholds. - my $nmin_mono = $depth + 3; #TODO: test with 2 + my $nmin_mono = $depth + 3; #TODO: test with 2 my $nmin_non_mono = $depth + 6; if ( $nmin_mono > $nlines_p - 1 ) { $nmin_mono = $nlines_p - 1; @@ -3195,6 +3168,8 @@ sub Dump_tree_groups { sub decide_if_aligned_pair { + my ($imax_align) = @_; + # Do not try to align two lines which are not really similar return unless ( @group_lines == 2 ); return if ($is_matching_terminal_line); @@ -3203,8 +3178,8 @@ sub Dump_tree_groups { my $group_list_type = $group_lines[0]->get_list_type(); return 0 if ($group_list_type); - my $jmax0 = $group_lines[0]->get_jmax(); - my $jmax1 = $group_lines[1]->get_jmax(); + my $jmax0 = $group_lines[0]->get_jmax(); + my $jmax1 = $group_lines[1]->get_jmax(); my $rtokens = $group_lines[0]->get_rtokens(); my $leading_equals = ( $rtokens->[0] =~ /=/ ); @@ -3341,8 +3316,8 @@ sub Dump_tree_groups { } # Remove the alignments if still marginal - if ( $is_marginal ) { combine_fields() } - return; + if ($is_marginal) { combine_fields($imax_align) } + return; } } @@ -3783,41 +3758,52 @@ sub get_extra_leading_spaces { sub combine_fields { - # combine all fields except for the comment field ( sidecmt.t ) + # We have a group of two lines for which we do not want to align tokens + # between index $imax_align and the side comment. So we will delete fields + # between $imax_align and the side comment. Alignments have already + # been set so we have to adjust them. + + my ($imax_align) = @_; + if ( !defined($imax_align) ) { $imax_align = -1 } + + # Correction: although this routine has the ability to retain some leading + # alignments, overall the results are much better if we always remove all + # of the alignments. Here is an example of the problem if we do not + # do this. The first two lines are marginal but match their =~ matches + # the third line. But if we keep it we get a big gap: + # return $path unless $path =~ /^~/; + # $path =~ s:^~([^/]+):(getpwnam($1))[$[+7]:e; + # $path =~ s:^~:$ENV{'HOME'} || (getpwuid($<))[$[+7]:e; + $imax_align = -1; + # Uses global variables: # @group_lines - # FIXME: also need to fix patterns and tokens, and allow variable jmax - my $maximum_field_index = $group_lines[0]->get_jmax(); - foreach my $line (@group_lines) { - my $rfields = $line->get_rfields(); - my $rfield_lengths = $line->get_rfield_lengths(); - foreach ( 1 .. $maximum_field_index - 1 ) { - $rfields->[0] .= $rfields->[$_]; - $rfield_lengths->[0] += $rfield_lengths->[$_]; - } - $rfields->[1] = $rfields->[$maximum_field_index]; - $rfield_lengths->[1] = $rfield_lengths->[$maximum_field_index]; - $line->set_jmax(1); - $line->set_column( 0, 0 ); - $line->set_column( 1, 0 ); + # First delete the unwanted tokens + my $jmax_old = $group_lines[0]->get_jmax(); + my @old_alignments = $group_lines[0]->get_alignments(); + my @idel = ( $imax_align + 1 .. $jmax_old - 2 ); - } - $maximum_field_index = 1; + return unless (@idel); foreach my $line (@group_lines) { - my $rfields = $line->get_rfields(); - my $rfield_lengths = $line->get_rfield_lengths(); - for my $k ( 0 .. $maximum_field_index ) { - my $pad = $rfield_lengths->[$k] - $line->current_field_width($k); - if ( $k == 0 ) { - $pad += $line->get_leading_space_count(); - } - - if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } + delete_selected_tokens( $line, \@idel ); + } - } + # Now adjust the alignments. Note that the side comment alignment + # is always at jmax-1, and there is an ending alignment at jmax. + my @new_alignments; + if ( $imax_align >= 0 ) { + @new_alignments[ 0 .. $imax_align ] = + @old_alignments[ 0 .. $imax_align ]; } + + my $jmax_new = $group_lines[0]->get_jmax(); + + $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ]; + $new_alignments[$jmax_new] = $old_alignments[$jmax_old]; + $group_lines[0]->set_alignments(@new_alignments); + $group_lines[1]->set_alignments(@new_alignments); return; } @@ -4341,3 +4327,4 @@ sub report_anything_unusual { return; } 1; + -- 2.39.5