{ # sub check_match
my %is_good_alignment;
+ my $EXPLAIN;
BEGIN {
my @q = qw( { ? => = );
push @q, (',');
@is_good_alignment{@q} = (1) x scalar(@q);
+
+ $EXPLAIN = 0;
}
sub check_match {
my $jmax = $new_line->get_jmax();
my $maximum_field_index = $old_line->get_jmax();
- # Variable $imax_align will be set to indicate the maximum token index
- # to be matched in the left-to-right sweep, in the case that this line
- # does not exactly match the current group.
+ # Variable $imax_align will be set to indicate the maximum token index
+ # to be matched in the left-to-right sweep, in the case that this line
+ # does not exactly match the current group.
my $imax_align = -1;
# variable $GoToLoc explains reason for no match, for debugging
# Everything up to the first digit is the actual token.
my ( $alignment_token, $lev, $tag, $tok_count ) =
- decode_alignment_token($new_tok);
+ decode_alignment_token($new_tok);
# see if the decorated tokens match
my $tokens_match = $new_tok eq $old_tok
}
# Everything matches so far, so we can update the maximum index
- # for partial alignment. We can avoid some poor alignments if
- # we just align to tokens at group level.
- $imax_align = $j if ($lev == $group_level);
+ # for partial alignment.
+ $imax_align = $j;
} ## end for my $j ( 0 .. $jlimit)
{
$marginal_match = 0;
}
+
##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
}
# The tokens match, but the lines must have identical number of
# tokens to join the group.
if ( $maximum_field_index != $jmax ) {
- $GoToLoc = "token count differs";
- $imax_align = $jmax - 2;
+ $GoToLoc = "token count differs";
goto NO_MATCH;
}
- #print "match, imax_align=$imax_align, jmax=$jmax\n";
- return ($imax_align);
+ $EXPLAIN && print "match, imax_align=$imax_align, jmax=$jmax\n";
+
+ # The tokens match. Now See if there is space for this line in the
+ # current group.
+ check_fit( $new_line, $old_line, $jlimit );
+
+ return;
NO_MATCH:
# variable $GoToLoc is for debugging
-##print "no match because $GoToLoc, flag=$imax_align\n";
+ $EXPLAIN && print "no match because $GoToLoc, flag=$imax_align\n";
end_rgroup($imax_align);
return;
my ( $new_line, $old_line, $imax_align ) = @_;
return unless (@group_lines);
- my $jmax = $new_line->get_jmax();
- my $leading_space_count = $new_line->get_leading_space_count();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rfield_lengths = $new_line->get_rfield_lengths();
- my $rpatterns = $new_line->get_rpatterns();
+ # The new line has alignments identical to the current group. Now we have
+ # to see if the new line can fit into the group without causing a field
+ # to exceed the line length limit. If it cannot, we will end the current
+ # group and start a new one.
+
+ my $jmax = $new_line->get_jmax();
+ my $leading_space_count = $new_line->get_leading_space_count();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
my $group_list_type = $group_lines[0]->get_list_type();
my $padding_so_far = 0;
my $padding_available = $old_line->get_available_space_on_right();
- # save current columns in case this doesn't work
+ # Save current columns in case this line does not fit.
save_alignment_columns();
+ # Loop over all alignments ...
my $maximum_field_index = $old_line->get_jmax();
for my $j ( 0 .. $jmax ) {
- ##my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
if ( $j == 0 ) {
$pad += $leading_space_count;
}
- # remember largest gap of the group, excluding gap to side comment
+ # Remember largest gap of the group, excluding gap to side comment.
if ( $pad < 0
&& $group_maximum_gap < -$pad
&& $j > 0
$group_maximum_gap = -$pad;
}
+ # Keep going if this field does not need any space.
next if $pad < 0;
- ## OLD NOTES:
- ## This patch helps sometimes, but it doesn't check to see if
- ## the line is too long even without the side comment. It needs
- ## to be reworked.
- ##don't let a long token with no trailing side comment push
- ##side comments out, or end a group. (sidecmt1.t)
- ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
-
- # BEGIN PATCH for keith1.txt.
- # If the group began matching multiple tokens but later this got
- # reduced to a fewer number of matching tokens, then the fields
- # of the later lines will still have to fit into their corresponding
- # fields. So a large later field will "push" the other fields to
- # the right, including previous side comments, and if there is no room
- # then there is no match.
- # For example, look at the last line in the following snippet:
-
- # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false;
- # my $env = ($b_prod_db) ? "prd" : "val";
- # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL";
- # my $task = $OPT{t};
- # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash";
-
- # The long term will push the '?' to the right to fit in, and in this
- # case there is not enough room so it will not match the equals unless
- # we do something special.
-
- # Usually it looks good to keep an initial alignment of '=' going, and
- # we can do this if the long term can fit in the space taken up by the
- # remaining fields (the ? : fields here).
-
- # Allowing any matching token for now, but it could be restricted
- # to an '='-like token if necessary.
-
- if (
- $pad > $padding_available
- && $jmax == 2 # matching one thing (plus #)
- && $j == $jmax - 1 # at last field
- && @group_lines > 1 # more than 1 line in group now
- && $jmax < $maximum_field_index # other lines have more fields
- && $rfield_lengths->[$jmax] == 0 # no side comment
-
- # Uncomment to match only equals (but this does not seem necessary)
- # && $rtokens->[0] =~ /^=\d/ # matching an equals
- )
- {
- my $extra_padding = 0;
- foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
- $extra_padding += $old_line->current_field_width($jj);
- }
-
- next if ( $pad <= $padding_available + $extra_padding );
- }
-
- # END PATCH for keith1.pl
-
- # This line will need space; lets see if we want to accept it..
- if (
-
- # not if this won't fit
- ( $pad > $padding_available )
-
- # previously, there were upper bounds placed on padding here
- # (maximum_whitespace_columns), but they were not really helpful
+ # See if it needs too much space.
+ if ( $pad > $padding_available ) {
- )
- {
-
- # revert to starting state then flush; things didn't work out
+ # Not enough room for it; revert to starting state then flush.
restore_alignment_columns();
end_rgroup($imax_align);
last;
}
- # patch to avoid excessive gaps in previous lines,
- # due to a line of fewer fields.
- # return join( ".",
- # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
- # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
- next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
-
- # looks ok, squeeze this field in
+ # This line fits, squeeze it in.
$old_line->increase_field_width( $j, $pad );
$padding_available -= $pad;
# Undo alignment of some poor two-line combinations.
# We had to wait until now to know the line count.
- decide_if_aligned_pair();
+ decide_if_aligned_pair($imax_align);
$rgroups->[-1]->[2] = $imax_align;
end_rgroup(-1) unless ( $side_comment && $prev_comment );
}
- # -------------------------------------------------------------
- # Flush previous group unless all common tokens and patterns
- # match..
- my $imax_align = check_match( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # See if there is space for this line in the current group (if
- # any)
- # -------------------------------------------------------------
- check_fit( $new_line, $base_line, $imax_align ) if (@group_lines);
+ # See if the new line matches and fits the current group.
+ # Flush the current group if not.
+ check_match( $new_line, $base_line );
+ # Store the new line
add_to_rgroup( $new_line, $jline );
if ( defined($j_terminal_match) ) {
# Hash to hold the maximum alignment change for any group
my %max_move;
+ # a small number of columns
+ my $short_pad = 4;
+
my $ng = -1;
foreach my $item ( @{$rgroups} ) {
$ng++;
$jbeg_m = $jbeg;
$jend_m = $jend;
- # Get values for this group. Note that we just have to use values for
- # one of the lines of the group since all members have the same alignments.
+ # Get values for this group. Note that we just have to use values for
+ # one of the lines of the group since all members have the same
+ # alignments.
( $jbeg, $jend, $istop ) = @{$item};
$line = $rlines->[$jbeg];
# is a compromise to keep some vertical alignment but prevent large
# gaps, which do not look good for just two lines.
my $ng_m = $ng - 1;
- $max_move{"$ng_m"} = $rOpts_indent_columns;
- $max_move{"$ng"} = $rOpts_indent_columns;
+ $max_move{"$ng_m"} = $short_pad;
+ $max_move{"$ng"} = $short_pad;
}
# Loop to find all common leading tokens.
###############################
# Step 3: Execute the task list
###############################
- do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move );
+ do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad );
return;
}
sub do_left_to_right_sweep {
- my ( $rlines, $rgroups, $rtodo, $rmax_move ) = @_;
+ my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad ) = @_;
my $move_to_common_column = sub {
$line->increase_field_width( $itok, $move );
}
- # Note that we continue on even if the move would have been
- # negative. We could also throw a switch to stop at this point,
- # but if we keep going we may get some additional alignments.
+ # Note that we continue on even if the move would have been
+ # negative. We could also throw a switch to stop at this point,
+ # but if we keep going we may get some additional alignments.
# So there may be jumps in aligned/non-aligned tokens when
# we are running out of space, but it does not seem to look
# any worse than stopping altogether.
my $ng_first; # index of the first group of a continuous sequence
my $col_want; # the common alignment column of a sequence of groups
my $col_limit; # maximum column before bumping into max line length
+ my $line_count_ng_m = 0;
+ my $jmax_m;
+ my $istop_m;
# Loop over the groups
foreach my $ng ( $ng_beg .. $ng_end ) {
- my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
+ my ( $jbeg, $jend, $istop ) = @{ $rgroups->[$ng] };
+ my $line_count_ng = $jend - $jbeg + 1;
# Important: note that since all lines in a group have a common
# alignments object, we just have to work on one of the lines (the
my $jmax = $line->get_jmax();
# the maximum space without exceeding the line length:
- my $col = $line->get_column($itok);
my $avail = $line->get_available_space_on_right();
+ my $col = $line->get_column($itok);
my $col_max = $col + $avail;
# Initialize on first group
if ( !defined($col_want) ) {
- $ng_first = $ng;
- $col_want = $col;
- $col_limit = $col_max;
+ $ng_first = $ng;
+ $col_want = $col;
+ $col_limit = $col_max;
+ $line_count_ng_m = $line_count_ng;
+ $jmax_m = $jmax;
+ $istop_m = $istop;
next;
}
+ # RULE: prevent a 'tail-wag-dog' syndrom:
+ # Do not let one or two lines with a different number of alignments
+ # open up a big gap in a large block. For example, we will prevent
+ # something like this, where the first line prys open the rest:
+
+ # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
+ # $worksheet->write( "C7", "", $format );
+ # $worksheet->write( "D7", "", $format );
+ # $worksheet->write( "D8", "", $format );
+ # $worksheet->write( "D8", "", $format );
+
+ # We should exclude from consideration two groups which are
+ # effectively the same but separated because one does not
+ # fit in the maximum allowed line length.
+ my $is_same_group = $jmax == $jmax_m && $istop_m == $jmax_m - 2;
+ my $is_big_gap;
+ if ( !$is_same_group ) {
+ $is_big_gap ||=
+ $line_count_ng >= 4
+ && $line_count_ng_m <= 2
+ && $col_want > $col + $short_pad;
+ $is_big_gap ||=
+ $line_count_ng_m >= 4
+ && $line_count_ng <= 2
+ && $col > $col_want + $short_pad;
+ }
+
# quit and restart if it cannot join this batch
- if ( $col_want > $col_max || $col > $col_limit ) {
+ if ( $col_want > $col_max || $col > $col_limit || $is_big_gap ) {
$move_to_common_column->( $ng_first, $ng - 1, $itok,
$col_want );
- $ng_first = $ng;
- $col_want = $col;
- $col_limit = $col_max;
+ $ng_first = $ng;
+ $col_want = $col;
+ $col_limit = $col_max;
+ $line_count_ng_m = $line_count_ng;
+ $jmax_m = $jmax;
+ $istop_m = $istop;
next;
}
+ $line_count_ng_m += $line_count_ng;
+
# update the common column and limit
if ( $col > $col_want ) { $col_want = $col }
if ( $col_max < $col_limit ) { $col_limit = $col_max }
)
{
-##print "deleting token $i tok=$tok\n";
push @idel, $i;
if ( !defined($delete_above_level)
|| $lev < $delete_above_level )
# $deep1 ~~ $deep1;
# So we will use two thresholds.
- my $nmin_mono = $depth + 3; #TODO: test with 2
+ my $nmin_mono = $depth + 3; #TODO: test with 2
my $nmin_non_mono = $depth + 6;
if ( $nmin_mono > $nlines_p - 1 ) {
$nmin_mono = $nlines_p - 1;
sub decide_if_aligned_pair {
+ my ($imax_align) = @_;
+
# Do not try to align two lines which are not really similar
return unless ( @group_lines == 2 );
return if ($is_matching_terminal_line);
my $group_list_type = $group_lines[0]->get_list_type();
return 0 if ($group_list_type);
- my $jmax0 = $group_lines[0]->get_jmax();
- my $jmax1 = $group_lines[1]->get_jmax();
+ my $jmax0 = $group_lines[0]->get_jmax();
+ my $jmax1 = $group_lines[1]->get_jmax();
my $rtokens = $group_lines[0]->get_rtokens();
my $leading_equals = ( $rtokens->[0] =~ /=/ );
}
# Remove the alignments if still marginal
- if ( $is_marginal ) { combine_fields() }
- return;
+ if ($is_marginal) { combine_fields($imax_align) }
+ return;
}
}
sub combine_fields {
- # combine all fields except for the comment field ( sidecmt.t )
+ # We have a group of two lines for which we do not want to align tokens
+ # between index $imax_align and the side comment. So we will delete fields
+ # between $imax_align and the side comment. Alignments have already
+ # been set so we have to adjust them.
+
+ my ($imax_align) = @_;
+ if ( !defined($imax_align) ) { $imax_align = -1 }
+
+ # Correction: although this routine has the ability to retain some leading
+ # alignments, overall the results are much better if we always remove all
+ # of the alignments. Here is an example of the problem if we do not
+ # do this. The first two lines are marginal but match their =~ matches
+ # the third line. But if we keep it we get a big gap:
+ # return $path unless $path =~ /^~/;
+ # $path =~ s:^~([^/]+):(getpwnam($1))[$[+7]:e;
+ # $path =~ s:^~:$ENV{'HOME'} || (getpwuid($<))[$[+7]:e;
+ $imax_align = -1;
+
# Uses global variables:
# @group_lines
- # FIXME: also need to fix patterns and tokens, and allow variable jmax
- my $maximum_field_index = $group_lines[0]->get_jmax();
- foreach my $line (@group_lines) {
- my $rfields = $line->get_rfields();
- my $rfield_lengths = $line->get_rfield_lengths();
- foreach ( 1 .. $maximum_field_index - 1 ) {
- $rfields->[0] .= $rfields->[$_];
- $rfield_lengths->[0] += $rfield_lengths->[$_];
- }
- $rfields->[1] = $rfields->[$maximum_field_index];
- $rfield_lengths->[1] = $rfield_lengths->[$maximum_field_index];
- $line->set_jmax(1);
- $line->set_column( 0, 0 );
- $line->set_column( 1, 0 );
+ # First delete the unwanted tokens
+ my $jmax_old = $group_lines[0]->get_jmax();
+ my @old_alignments = $group_lines[0]->get_alignments();
+ my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
- }
- $maximum_field_index = 1;
+ return unless (@idel);
foreach my $line (@group_lines) {
- my $rfields = $line->get_rfields();
- my $rfield_lengths = $line->get_rfield_lengths();
- for my $k ( 0 .. $maximum_field_index ) {
- my $pad = $rfield_lengths->[$k] - $line->current_field_width($k);
- if ( $k == 0 ) {
- $pad += $line->get_leading_space_count();
- }
-
- if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
+ delete_selected_tokens( $line, \@idel );
+ }
- }
+ # Now adjust the alignments. Note that the side comment alignment
+ # is always at jmax-1, and there is an ending alignment at jmax.
+ my @new_alignments;
+ if ( $imax_align >= 0 ) {
+ @new_alignments[ 0 .. $imax_align ] =
+ @old_alignments[ 0 .. $imax_align ];
}
+
+ my $jmax_new = $group_lines[0]->get_jmax();
+
+ $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
+ $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
+ $group_lines[0]->set_alignments(@new_alignments);
+ $group_lines[1]->set_alignments(@new_alignments);
return;
}
return;
}
1;
+