package Perl::Tidy::VerticalAligner;
use strict;
use warnings;
-our $VERSION = '20200110';
-
+use Carp;
+our $VERSION = '20220217';
use Perl::Tidy::VerticalAligner::Alignment;
use Perl::Tidy::VerticalAligner::Line;
+use constant DEVEL_MODE => 0;
+
# The Perl::Tidy::VerticalAligner package collects output lines and
# attempts to line up certain common tokens, such as => and #, which are
# identified by the calling routine.
#
-# There are two main routines: valign_input and flush. Append acts as a
-# storage buffer, collecting lines into a group which can be vertically
-# aligned. When alignment is no longer possible or desirable, it dumps
-# the group to flush.
+# Usage:
+# - Initiate an object with a call to new().
+# - Write lines one-by-one with calls to valign_input().
+# - Make a final call to flush() to empty the pipeline.
#
-# valign_input -----> flush
+# The sub valign_input collects lines into groups. When a group reaches
+# the maximum possible size it is processed for alignment and output.
+# The maximum group size is reached whenerver there is a change in indentation
+# level, a blank line, a block comment, or an external flush call. The calling
+# routine may also force a break in alignment at any time.
#
-# collects writes
-# vertical one
-# groups group
+# If the calling routine needs to interrupt the output and send other text to
+# the output, it must first call flush() to empty the output pipeline. This
+# might occur for example if a block of pod text needs to be sent to the output
+# between blocks of code.
+
+# It is essential that a final call to flush() be made. Otherwise some
+# final lines of text will be lost.
+
+# Index...
+# CODE SECTION 1: Preliminary code, global definitions and sub new
+# sub new
+# CODE SECTION 2: Some Basic Utilities
+# CODE SECTION 3: Code to accept input and form groups
+# sub valign_input
+# CODE SECTION 4: Code to process comment lines
+# sub _flush_comment_lines
+# CODE SECTION 5: Code to process groups of code lines
+# sub _flush_group_lines
+# CODE SECTION 6: Output Step A
+# sub valign_output_step_A
+# CODE SECTION 7: Output Step B
+# sub valign_output_step_B
+# CODE SECTION 8: Output Step C
+# sub valign_output_step_C
+# CODE SECTION 9: Output Step D
+# sub valign_output_step_D
+# CODE SECTION 10: Summary
+# sub report_anything_unusual
+
+##################################################################
+# CODE SECTION 1: Preliminary code, global definitions and sub new
+##################################################################
+
+sub AUTOLOAD {
+
+ # Catch any undefined sub calls so that we are sure to get
+ # some diagnostic information. This sub should never be called
+ # except for a programming error.
+ our $AUTOLOAD;
+ return if ( $AUTOLOAD =~ /\bDESTROY$/ );
+ my ( $pkg, $fname, $lno ) = caller();
+ my $my_package = __PACKAGE__;
+ print STDERR <<EOM;
+======================================================================
+Error detected in package '$my_package', version $VERSION
+Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
+Called from package: '$pkg'
+Called from File '$fname' at line '$lno'
+This error is probably due to a recent programming change
+======================================================================
+EOM
+ exit 1;
+}
+
+sub DESTROY {
+
+ # required to avoid call to AUTOLOAD in some versions of perl
+}
+
+sub Die {
+ my ($msg) = @_;
+ Perl::Tidy::Die($msg);
+ croak "unexpected return from Perl::Tidy::Die";
+}
+
+sub Fault {
+ my ($msg) = @_;
+
+ # This routine is called for errors that really should not occur
+ # except if there has been a bug introduced by a recent program change.
+ # Please add comments at calls to Fault to explain why the call
+ # should not occur, and where to look to fix it.
+ my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+ my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+ my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $input_stream_name = get_input_stream_name();
+
+ Die(<<EOM);
+==============================================================================
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+Perl::Tidy::VerticalAligner.pm reports VERSION='$VERSION'.
+==============================================================================
+EOM
+
+ # We shouldn't get here, but this return is to keep Perl-Critic from
+ # complaining.
+ return;
+}
BEGIN {
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
+ # Define the fixed indexes for variables in $self, which is an array
+ # reference. Note the convention of leading and trailing underscores to
+ # keep them unique.
+ # Do not combine with other BEGIN blocks (c101).
+ my $i = 0;
+ use constant {
+ _file_writer_object_ => $i++,
+ _logger_object_ => $i++,
+ _diagnostics_object_ => $i++,
+ _length_function_ => $i++,
+
+ _rOpts_ => $i++,
+ _rOpts_indent_columns_ => $i++,
+ _rOpts_tabs_ => $i++,
+ _rOpts_entab_leading_whitespace_ => $i++,
+ _rOpts_fixed_position_side_comment_ => $i++,
+ _rOpts_minimum_space_to_comment_ => $i++,
+ _rOpts_valign_code_ => $i++,
+ _rOpts_valign_block_comments_ => $i++,
+ _rOpts_valign_side_comments_ => $i++,
+
+ _last_level_written_ => $i++,
+ _last_side_comment_column_ => $i++,
+ _last_side_comment_line_number_ => $i++,
+ _last_side_comment_length_ => $i++,
+ _last_side_comment_level_ => $i++,
+ _outdented_line_count_ => $i++,
+ _first_outdented_line_at_ => $i++,
+ _last_outdented_line_at_ => $i++,
+ _consecutive_block_comments_ => $i++,
+
+ _rgroup_lines_ => $i++,
+ _group_level_ => $i++,
+ _group_type_ => $i++,
+ _group_maximum_line_length_ => $i++,
+ _zero_count_ => $i++,
+ _last_leading_space_count_ => $i++,
+ _comment_leading_space_count_ => $i++,
+ };
+
+ # Debug flag. This is a relic from the original program development
+ # looking for problems with tab characters. Caution: this debug flag can
+ # produce a lot of output It should be 0 except when debugging small
+ # scripts.
- use constant VALIGN_DEBUG_FLAG_APPEND => 0;
- use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
- use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
- use constant VALIGN_DEBUG_FLAG_TABS => 0;
+ use constant DEBUG_TABS => 0;
my $debug_warning = sub {
print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
return;
};
- VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
- VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
- VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
- VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
-
+ DEBUG_TABS && $debug_warning->('TABS');
}
-use vars qw(
- $vertical_aligner_self
- $maximum_alignment_index
- $ralignment_list
- $maximum_jmax_seen
- $minimum_jmax_seen
- $previous_minimum_jmax_seen
- $previous_maximum_jmax_seen
- @group_lines
- $group_level
- $group_type
- $group_maximum_gap
- $marginal_match
- $last_level_written
- $last_leading_space_count
- $extra_indent_ok
- $zero_count
- $last_comment_column
- $last_side_comment_line_number
- $last_side_comment_length
- $last_side_comment_level
- $outdented_line_count
- $first_outdented_line_at
- $last_outdented_line_at
- $diagnostics_object
- $logger_object
- $file_writer_object
- @side_comment_history
- $comment_leading_space_count
- $is_matching_terminal_line
- $consecutive_block_comments
-
- $cached_line_text
- $cached_line_type
- $cached_line_flag
- $cached_seqno
- $cached_line_valid
- $cached_line_leading_space_count
- $cached_seqno_string
-
- $valign_buffer_filling
- @valign_buffer
-
- $seqno_string
- $last_nonblank_seqno_string
-
- $rOpts
-
- $rOpts_maximum_line_length
- $rOpts_variable_maximum_line_length
- $rOpts_continuation_indentation
- $rOpts_indent_columns
- $rOpts_tabs
- $rOpts_entab_leading_whitespace
- $rOpts_valign
-
- $rOpts_fixed_position_side_comment
- $rOpts_minimum_space_to_comment
+# GLOBAL variables
+my (
+
+ %valign_control_hash,
+ $valign_control_default,
);
-sub initialize {
-
- (
- my $class, $rOpts, $file_writer_object, $logger_object,
- $diagnostics_object
- ) = @_;
-
- # variables describing the entire space group:
- $ralignment_list = [];
- $group_level = 0;
- $last_level_written = -1;
- $extra_indent_ok = 0; # can we move all lines to the right?
- $last_side_comment_length = 0;
- $maximum_jmax_seen = 0;
- $minimum_jmax_seen = 0;
- $previous_minimum_jmax_seen = 0;
- $previous_maximum_jmax_seen = 0;
-
- # variables describing each line of the group
- @group_lines = (); # list of all lines in group
-
- $outdented_line_count = 0;
- $first_outdented_line_at = 0;
- $last_outdented_line_at = 0;
- $last_side_comment_line_number = 0;
- $last_side_comment_level = -1;
- $is_matching_terminal_line = 0;
-
- # most recent 3 side comments; [ line number, column ]
- $side_comment_history[0] = [ -300, 0 ];
- $side_comment_history[1] = [ -200, 0 ];
- $side_comment_history[2] = [ -100, 0 ];
-
- # valign_output_step_B cache:
- $cached_line_text = "";
- $cached_line_type = 0;
- $cached_line_flag = 0;
- $cached_seqno = 0;
- $cached_line_valid = 0;
- $cached_line_leading_space_count = 0;
- $cached_seqno_string = "";
-
- # string of sequence numbers joined together
- $seqno_string = "";
- $last_nonblank_seqno_string = "";
-
- # frequently used parameters
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_tabs = $rOpts->{'tabs'};
- $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
- $rOpts_fixed_position_side_comment =
+sub check_options {
+
+ # This routine is called to check the user-supplied run parameters
+ # and to configure the control hashes to them.
+ my ($rOpts) = @_;
+
+ # All alignments are done by default
+ %valign_control_hash = ();
+ $valign_control_default = 1;
+
+ # If -vil=s is entered without -vxl, assume -vxl='*'
+ if ( !$rOpts->{'valign-exclusion-list'}
+ && $rOpts->{'valign-inclusion-list'} )
+ {
+ $rOpts->{'valign-exclusion-list'} = '*';
+ }
+
+ # See if the user wants to exclude any alignment types ...
+ if ( $rOpts->{'valign-exclusion-list'} ) {
+
+ # The inclusion list is only relevant if there is an exclusion list
+ if ( $rOpts->{'valign-inclusion-list'} ) {
+ my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
+ @valign_control_hash{@vil} = (1) x scalar(@vil);
+ }
+
+ # Note that the -vxl list is done after -vil, so -vxl has priority
+ # in the event of duplicate entries.
+ my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
+ @valign_control_hash{@vxl} = (0) x scalar(@vxl);
+
+ # Optimization: revert to defaults if no exclusions.
+ # This could happen with -vxl=' ' and any -vil list
+ if ( !@vxl ) {
+ %valign_control_hash = ();
+ }
+
+ # '$valign_control_default' applies to types not in the hash:
+ # - If a '*' was entered then set it to be that default type
+ # - Otherwise, leave it set it to 1
+ if ( defined( $valign_control_hash{'*'} ) ) {
+ $valign_control_default = $valign_control_hash{'*'};
+ }
+
+ # Side comments are controlled separately and must be removed
+ # if given in a list.
+ if (%valign_control_hash) {
+ $valign_control_hash{'#'} = 1;
+ }
+ }
+
+ return;
+}
+
+sub new {
+
+ my ( $class, @args ) = @_;
+
+ my %defaults = (
+ rOpts => undef,
+ file_writer_object => undef,
+ logger_object => undef,
+ diagnostics_object => undef,
+ length_function => sub { return length( $_[0] ) },
+ );
+ my %args = ( %defaults, @args );
+
+ # Initialize other caches and buffers
+ initialize_step_B_cache();
+ initialize_valign_buffer();
+ initialize_leading_string_cache();
+ initialize_decode();
+ set_logger_object( $args{logger_object} );
+
+ # Initialize all variables in $self.
+ # To add an item to $self, first define a new constant index in the BEGIN
+ # section.
+ my $self = [];
+
+ # objects
+ $self->[_file_writer_object_] = $args{file_writer_object};
+ $self->[_logger_object_] = $args{logger_object};
+ $self->[_diagnostics_object_] = $args{diagnostics_object};
+ $self->[_length_function_] = $args{length_function};
+
+ # shortcuts to user options
+ my $rOpts = $args{rOpts};
+
+ $self->[_rOpts_] = $rOpts;
+ $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
+ $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
+ $self->[_rOpts_entab_leading_whitespace_] =
+ $rOpts->{'entab-leading-whitespace'};
+ $self->[_rOpts_fixed_position_side_comment_] =
$rOpts->{'fixed-position-side-comment'};
- $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
- $rOpts_valign = $rOpts->{'valign'};
+ $self->[_rOpts_minimum_space_to_comment_] =
+ $rOpts->{'minimum-space-to-comment'};
+ $self->[_rOpts_valign_code_] = $rOpts->{'valign-code'};
+ $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
+ $self->[_rOpts_valign_side_comments_] = $rOpts->{'valign-side-comments'};
+
+ # Batch of lines being collected
+ $self->[_rgroup_lines_] = [];
+ $self->[_group_level_] = 0;
+ $self->[_group_type_] = "";
+ $self->[_group_maximum_line_length_] = undef;
+ $self->[_zero_count_] = 0;
+ $self->[_comment_leading_space_count_] = 0;
+ $self->[_last_leading_space_count_] = 0;
+
+ # Memory of what has been processed
+ $self->[_last_level_written_] = -1;
+ $self->[_last_side_comment_column_] = 0;
+ $self->[_last_side_comment_line_number_] = 0;
+ $self->[_last_side_comment_length_] = 0;
+ $self->[_last_side_comment_level_] = -1;
+ $self->[_outdented_line_count_] = 0;
+ $self->[_first_outdented_line_at_] = 0;
+ $self->[_last_outdented_line_at_] = 0;
+ $self->[_consecutive_block_comments_] = 0;
+
+ bless $self, $class;
+ return $self;
+}
+
+#################################
+# CODE SECTION 2: Basic Utilities
+#################################
+
+sub flush {
- $consecutive_block_comments = 0;
- forget_side_comment();
+ # flush() is the external call to completely empty the pipeline.
+ my ($self) = @_;
- initialize_for_new_group();
+ # push things out the pipline...
- $vertical_aligner_self = {};
- bless $vertical_aligner_self, $class;
- return $vertical_aligner_self;
+ # push out any current group lines
+ $self->_flush_group_lines();
+
+ # then anything left in the cache of step_B
+ $self->_flush_cache();
+
+ # then anything left in the buffer of step_C
+ $self->dump_valign_buffer();
+
+ return;
}
sub initialize_for_new_group {
- @group_lines = ();
- $maximum_alignment_index = -1; # alignments in current group
- $zero_count = 0; # count consecutive lines without tokens
- $group_maximum_gap = 0; # largest gap introduced
- $group_type = "";
- $marginal_match = 0;
- $comment_leading_space_count = 0;
- $last_leading_space_count = 0;
+ my ($self) = @_;
+
+ $self->[_rgroup_lines_] = [];
+ $self->[_group_type_] = "";
+ $self->[_zero_count_] = 0;
+ $self->[_comment_leading_space_count_] = 0;
+ $self->[_last_leading_space_count_] = 0;
+ $self->[_group_maximum_line_length_] = undef;
+
+ # Note that the value for _group_level_ is
+ # handled separately in sub valign_input
return;
}
+sub group_line_count {
+ return +@{ $_[0]->[_rgroup_lines_] };
+}
+
# interface to Perl::Tidy::Diagnostics routines
+# For debugging; not currently used
sub write_diagnostics {
- my $msg = shift;
+ my ( $self, $msg ) = @_;
+ my $diagnostics_object = $self->[_diagnostics_object_];
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics($msg);
}
return;
}
-# interface to Perl::Tidy::Logger routines
-sub warning {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->warning($msg);
+{ ## begin closure for logger routines
+ my $logger_object;
+
+ # Called once per file to initialize the logger object
+ sub set_logger_object {
+ $logger_object = shift;
+ return;
}
- return;
-}
-sub write_logfile_entry {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->write_logfile_entry($msg);
+ sub get_logger_object {
+ return $logger_object;
}
- return;
-}
-sub report_definite_bug {
- if ($logger_object) {
- $logger_object->report_definite_bug();
+ sub get_input_stream_name {
+ my $input_stream_name = "";
+ if ($logger_object) {
+ $input_stream_name = $logger_object->get_input_stream_name();
+ }
+ return $input_stream_name;
+ }
+
+ sub warning {
+ my ($msg) = @_;
+ if ($logger_object) {
+ $logger_object->warning($msg);
+ }
+ return;
+ }
+
+ sub write_logfile_entry {
+ my ($msg) = @_;
+ if ($logger_object) {
+ $logger_object->write_logfile_entry($msg);
+ }
+ return;
}
- return;
}
sub get_cached_line_count {
my $self = shift;
- return @group_lines + ( $cached_line_type ? 1 : 0 );
-}
-
-sub get_spaces {
-
- # return the number of leading spaces associated with an indentation
- # variable $indentation is either a constant number of spaces or an
- # object with a get_spaces method.
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
+ return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
}
sub get_recoverable_spaces {
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
}
-sub get_stack_depth {
-
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_stack_depth() : 0;
-}
-
-sub make_alignment {
- my ( $col, $token ) = @_;
-
- # make one new alignment at column $col which aligns token $token
- ++$maximum_alignment_index;
-
- #my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
- my $nlines = @group_lines;
- my $alignment = Perl::Tidy::VerticalAligner::Alignment->new(
- column => $col,
- starting_column => $col,
- matching_token => $token,
- starting_line => $nlines - 1,
- ending_line => $nlines - 1,
- serial_number => $maximum_alignment_index,
- );
- $ralignment_list->[$maximum_alignment_index] = $alignment;
- return $alignment;
-}
-
-sub dump_alignments {
- print STDOUT
-"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
- for my $i ( 0 .. $maximum_alignment_index ) {
- my $column = $ralignment_list->[$i]->get_column();
- my $starting_column = $ralignment_list->[$i]->get_starting_column();
- my $matching_token = $ralignment_list->[$i]->get_matching_token();
- my $starting_line = $ralignment_list->[$i]->get_starting_line();
- my $ending_line = $ralignment_list->[$i]->get_ending_line();
- print STDOUT
-"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
- }
- return;
-}
-
-sub save_alignment_columns {
- for my $i ( 0 .. $maximum_alignment_index ) {
- $ralignment_list->[$i]->save_column();
- }
- return;
-}
+######################################################
+# CODE SECTION 3: Code to accept input and form groups
+######################################################
-sub restore_alignment_columns {
- for my $i ( 0 .. $maximum_alignment_index ) {
- $ralignment_list->[$i]->restore_column();
- }
- return;
-}
+sub push_group_line {
-sub forget_side_comment {
- $last_comment_column = 0;
+ my ( $self, $new_line ) = @_;
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ push @{$rgroup_lines}, $new_line;
return;
}
-sub maximum_line_length_for_level {
-
- # return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
- if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
- }
- return $maximum_line_length;
-}
+use constant DEBUG_VALIGN => 0;
+use constant SC_LONG_LINE_DIFF => 12;
-sub push_group_line {
+my %is_closing_token;
- my ($new_line) = @_;
- push @group_lines, $new_line;
- return;
+BEGIN {
+ my @q = qw< } ) ] >;
+ @is_closing_token{@q} = (1) x scalar(@q);
}
sub valign_input {
# Place one line in the current vertical group.
#
- # The input parameters are:
- # $level = indentation level of this line
- # $rfields = reference to array of fields
- # $rpatterns = reference to array of patterns, one per field
- # $rtokens = reference to array of tokens starting fields 1,2,..
+ # The key input parameters describing each line are:
+ # $level = indentation level of this line
+ # $rfields = ref to array of fields
+ # $rpatterns = ref to array of patterns, one per field
+ # $rtokens = ref to array of tokens starting fields 1,2,..
+ # $rfield_lengths = ref to array of field display widths
#
# Here is an example of what this package does. In this example,
# we are trying to line up both the '=>' and the '#'.
# the matching tokens, and the last one tracks the maximum line length.
#
# Each time a new line comes in, it joins the current vertical
- # group if possible. Otherwise it causes the current group to be dumped
+ # group if possible. Otherwise it causes the current group to be flushed
# and a new group is started.
#
# For each new group member, the column locations are increased, as
# side comments. Tabs in these fields can mess up the column counting.
# The log file warns the user if there are any such tabs.
- my ( $rline_hash, $rfields, $rtokens, $rpatterns ) = @_;
+ my ( $self, $rline_hash ) = @_;
+
my $level = $rline_hash->{level};
my $level_end = $rline_hash->{level_end};
my $indentation = $rline_hash->{indentation};
- my $is_forced_break = $rline_hash->{is_forced_break};
+ my $list_seqno = $rline_hash->{list_seqno};
my $outdent_long_lines = $rline_hash->{outdent_long_lines};
my $is_terminal_ternary = $rline_hash->{is_terminal_ternary};
- my $is_terminal_statement = $rline_hash->{is_terminal_statement};
- my $do_not_pad = $rline_hash->{do_not_pad};
my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
- my $level_jump = $rline_hash->{level_jump};
+ my $break_alignment_before = $rline_hash->{break_alignment_before};
+ my $break_alignment_after = $rline_hash->{break_alignment_after};
+ my $Kend = $rline_hash->{Kend};
+ my $ci_level = $rline_hash->{ci_level};
+ my $maximum_line_length = $rline_hash->{maximum_line_length};
+ my $forget_side_comment = $rline_hash->{forget_side_comment};
+ my $rline_alignment = $rline_hash->{rline_alignment};
+
+ my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
+ @{$rline_alignment};
+
+ # The index '$Kend' is a value which passed along with the line text to sub
+ # 'write_code_line' for a convergence check.
# number of fields is $jmax
# number of tokens between fields is $jmax-1
my $jmax = @{$rfields} - 1;
- my $leading_space_count = get_spaces($indentation);
+ my $leading_space_count =
+ ref($indentation) ? $indentation->get_spaces() : $indentation;
# set outdented flag to be sure we either align within statements or
# across statement boundaries, but not both.
- my $is_outdented = $last_leading_space_count > $leading_space_count;
- $last_leading_space_count = $leading_space_count;
+ my $is_outdented =
+ $self->[_last_leading_space_count_] > $leading_space_count;
+ $self->[_last_leading_space_count_] = $leading_space_count;
- # Patch: undo for hanging side comment
+ # Identify a hanging side comment. Hanging side comments have an empty
+ # initial field.
my $is_hanging_side_comment =
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
+
+ # Undo outdented flag for a hanging side comment
$is_outdented = 0 if $is_hanging_side_comment;
- # Forget side comment alignment after seeing 2 or more block comments
- my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
+ # Identify a block comment.
+ my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
+
+ # Block comment .. update count
if ($is_block_comment) {
- $consecutive_block_comments++;
+ $self->[_consecutive_block_comments_]++;
}
+
+ # Not a block comment ..
+ # Forget side comment column if we saw 2 or more block comments,
+ # and reset the count
else {
- if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
- $consecutive_block_comments = 0;
+
+ if ( $self->[_consecutive_block_comments_] > 1 ) {
+ $self->forget_side_comment();
+ }
+ $self->[_consecutive_block_comments_] = 0;
+ }
+
+ # Reset side comment location if we are entering a new block from level 0.
+ # This is intended to keep them from drifting too far to the right.
+ if ($forget_side_comment) {
+ $self->forget_side_comment();
}
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- my $nlines = @group_lines;
+ my $is_balanced_line = $level_end == $level;
+
+ my $group_level = $self->[_group_level_];
+ my $group_maximum_line_length = $self->[_group_maximum_line_length_];
+
+ DEBUG_VALIGN && do {
+ my $nlines = $self->group_line_count();
print STDOUT
-"APPEND0: entering lines=$nlines new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
+"Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
};
# Validate cached line if necessary: If we can produce a container
# with just 2 lines total by combining an existing cached opening
# token with the closing token to follow, then we will mark both
# cached flags as valid.
- if ($rvertical_tightness_flags) {
- if ( @group_lines <= 1
- && $cached_line_type
- && $cached_seqno
- && $rvertical_tightness_flags->[2]
- && $rvertical_tightness_flags->[2] == $cached_seqno )
- {
- $rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
+ my $cached_line_type = get_cached_line_type();
+ if ($cached_line_type) {
+ my $cached_line_opening_flag = get_cached_line_opening_flag();
+ if ($rvertical_tightness_flags) {
+ my $cached_seqno = get_cached_seqno();
+ if ( $cached_seqno
+ && $rvertical_tightness_flags->{_vt_seqno}
+ && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
+ {
+
+ # Fix for b1187 and b1188: Normally this step is only done
+ # if the number of existing lines is 0 or 1. But to prevent
+ # blinking, this range can be controlled by the caller.
+ # If zero values are given we fall back on the range 0 to 1.
+ my $line_count = $self->group_line_count();
+ my $min_lines = $rvertical_tightness_flags->{_vt_min_lines};
+ my $max_lines = $rvertical_tightness_flags->{_vt_max_lines};
+ $min_lines = 0 unless ($min_lines);
+ $max_lines = 1 unless ($max_lines);
+ if ( ( $line_count >= $min_lines )
+ && ( $line_count <= $max_lines ) )
+ {
+ $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
+ set_cached_line_valid(1);
+ }
+ }
}
- }
- # do not join an opening block brace with an unbalanced line
- # unless requested with a flag value of 2
- if ( $cached_line_type == 3
- && !@group_lines
- && $cached_line_flag < 2
- && $level_jump != 0 )
- {
- $cached_line_valid = 0;
+ # do not join an opening block brace with an unbalanced line
+ # unless requested with a flag value of 2
+ if ( $cached_line_type == 3
+ && !$self->group_line_count()
+ && $cached_line_opening_flag < 2
+ && !$is_balanced_line )
+ {
+ set_cached_line_valid(0);
+ }
}
- # patch until new aligner is finished
- if ($do_not_pad) { my_flush() }
-
# shouldn't happen:
if ( $level < 0 ) { $level = 0 }
# do not align code across indentation level changes
- # or if vertical alignment is turned off for debugging
- if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
-
- # we are allowed to shift a group of lines to the right if its
- # level is greater than the previous and next group
- $extra_indent_ok =
- ( $level < $group_level && $last_level_written < $group_level );
-
- my_flush();
-
- # If we know that this line will get flushed out by itself because
- # of level changes, we can leave the extra_indent_ok flag set.
- # That way, if we get an external flush call, we will still be
- # able to do some -lp alignment if necessary.
- $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
+ # or changes in the maximum line length
+ # or if vertical alignment is turned off
+ if (
+ $level != $group_level
+ || ( $group_maximum_line_length
+ && $maximum_line_length != $group_maximum_line_length )
+ || $is_outdented
+ || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
+ || ( !$is_block_comment
+ && !$self->[_rOpts_valign_side_comments_]
+ && !$self->[_rOpts_valign_code_] )
+ )
+ {
- $group_level = $level;
+ $self->_flush_group_lines( $level - $group_level );
- # wait until after the above flush to get the leading space
- # count because it may have been changed if the -icp flag is in
- # effect
- $leading_space_count = get_spaces($indentation);
+ $group_level = $level;
+ $self->[_group_level_] = $group_level;
+ $self->[_group_maximum_line_length_] = $maximum_line_length;
+ # Update leading spaces after the above flush because the leading space
+ # count may have been changed if the -icp flag is in effect
+ $leading_space_count =
+ ref($indentation) ? $indentation->get_spaces() : $indentation;
}
# --------------------------------------------------------------------
# Collect outdentable block COMMENTS
# --------------------------------------------------------------------
my $is_blank_line = "";
- if ( $group_type eq 'COMMENT' ) {
+ if ( $self->[_group_type_] eq 'COMMENT' ) {
if (
(
$is_block_comment
&& $outdent_long_lines
- && $leading_space_count == $comment_leading_space_count
+ && $leading_space_count ==
+ $self->[_comment_leading_space_count_]
)
|| $is_blank_line
)
{
- push_group_line( $rfields->[0] );
+
+ # Note that for a comment group we are not storing a line
+ # but rather just the text and its length.
+ $self->push_group_line(
+ [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
else {
- my_flush();
+ $self->_flush_group_lines();
}
}
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ if ( $break_alignment_before && @{$rgroup_lines} ) {
+ $rgroup_lines->[-1]->set_end_group(1);
+ }
+
# --------------------------------------------------------------------
# add dummy fields for terminal ternary
# --------------------------------------------------------------------
my $j_terminal_match;
- if ( $is_terminal_ternary && @group_lines ) {
+ if ( $is_terminal_ternary && @{$rgroup_lines} ) {
$j_terminal_match =
- fix_terminal_ternary( $group_lines[-1], $rfields, $rtokens,
- $rpatterns );
+ fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
+ $rpatterns, $rfield_lengths, $group_level, );
$jmax = @{$rfields} - 1;
}
# add dummy fields for else statement
# --------------------------------------------------------------------
- if ( $rfields->[0] =~ /^else\s*$/
- && @group_lines
- && $level_jump == 0 )
+ # Note the trailing space after 'else' here. If there were no space between
+ # the else and the next '{' then we would not be able to do vertical
+ # alignment of the '{'.
+ if ( $rfields->[0] eq 'else '
+ && @{$rgroup_lines}
+ && $is_balanced_line )
{
$j_terminal_match =
- fix_terminal_else( $group_lines[-1], $rfields, $rtokens, $rpatterns );
+ fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
+ $rpatterns, $rfield_lengths );
$jmax = @{$rfields} - 1;
}
# Handle simple line of code with no fields to match.
# --------------------------------------------------------------------
if ( $jmax <= 0 ) {
- $zero_count++;
+ $self->[_zero_count_]++;
- if ( @group_lines
- && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) )
+ if ( @{$rgroup_lines}
+ && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
+ )
{
# flush the current group if it has some aligned columns..
- if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
-
- # flush current group if we are just collecting side comments..
- elsif (
-
- # ...and we haven't seen a comment lately
- ( $zero_count > 3 )
-
- # ..or if this new line doesn't fit to the left of the comments
- || ( ( $leading_space_count + length( $rfields->[0] ) ) >
- $group_lines[0]->get_column(0) )
- )
+ # or we haven't seen a comment lately
+ if ( $rgroup_lines->[0]->get_jmax() > 1
+ || $self->[_zero_count_] > 3 )
{
- my_flush();
+ $self->_flush_group_lines();
}
}
# start new COMMENT group if this comment may be outdented
if ( $is_block_comment
&& $outdent_long_lines
- && !@group_lines )
+ && !$self->group_line_count() )
{
- $group_type = 'COMMENT';
- $comment_leading_space_count = $leading_space_count;
- push_group_line( $rfields->[0] );
+ $self->[_group_type_] = 'COMMENT';
+ $self->[_comment_leading_space_count_] = $leading_space_count;
+ $self->[_group_maximum_line_length_] = $maximum_line_length;
+ $self->push_group_line(
+ [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
# just write this line directly if no current group, no side comment,
# and no space recovery is needed.
- if ( !@group_lines && !get_recoverable_spaces($indentation) ) {
- valign_output_step_B( $leading_space_count, $rfields->[0], 0,
- $outdent_long_lines, $rvertical_tightness_flags, $level );
+ if ( !$self->group_line_count()
+ && !get_recoverable_spaces($indentation) )
+ {
+
+ $self->valign_output_step_B(
+ {
+ leading_space_count => $leading_space_count,
+ line => $rfields->[0],
+ line_length => $rfield_lengths->[0],
+ side_comment_length => 0,
+ outdent_long_lines => $outdent_long_lines,
+ rvertical_tightness_flags => $rvertical_tightness_flags,
+ level => $level,
+ level_end => $level_end,
+ Kend => $Kend,
+ maximum_line_length => $maximum_line_length,
+ }
+ );
return;
}
}
else {
- $zero_count = 0;
+ $self->[_zero_count_] = 0;
}
- # programming check: (shouldn't happen)
- # an error here implies an incorrect call was made
- if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
- my $nt = @{$rtokens};
- my $nf = @{$rfields};
- warning(
-"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $nt should be one less than number of fields: $nf)\n"
- );
- report_definite_bug();
+ # --------------------------------------------------------------------
+ # It simplifies things to create a zero length side comment
+ # if none exists.
+ # --------------------------------------------------------------------
+ if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
+ $jmax += 1;
+ $rtokens->[ $jmax - 1 ] = '#';
+ $rfields->[$jmax] = '';
+ $rfield_lengths->[$jmax] = 0;
+ $rpatterns->[$jmax] = '#';
}
- my $maximum_line_length_for_level = maximum_line_length_for_level($level);
# --------------------------------------------------------------------
# create an object to hold this line
# --------------------------------------------------------------------
my $new_line = Perl::Tidy::VerticalAligner::Line->new(
- jmax => $jmax,
- jmax_original_line => $jmax,
- rtokens => $rtokens,
- rfields => $rfields,
- rpatterns => $rpatterns,
- indentation => $indentation,
- leading_space_count => $leading_space_count,
- outdent_long_lines => $outdent_long_lines,
- list_type => "",
- is_hanging_side_comment => $is_hanging_side_comment,
- maximum_line_length => $maximum_line_length_for_level,
- rvertical_tightness_flags => $rvertical_tightness_flags,
- is_terminal_ternary => $is_terminal_ternary,
- j_terminal_match => $j_terminal_match,
+ {
+ jmax => $jmax,
+ rtokens => $rtokens,
+ rfields => $rfields,
+ rpatterns => $rpatterns,
+ rfield_lengths => $rfield_lengths,
+ indentation => $indentation,
+ leading_space_count => $leading_space_count,
+ outdent_long_lines => $outdent_long_lines,
+ list_seqno => $list_seqno,
+ list_type => "",
+ is_hanging_side_comment => $is_hanging_side_comment,
+ rvertical_tightness_flags => $rvertical_tightness_flags,
+ is_terminal_ternary => $is_terminal_ternary,
+ j_terminal_match => $j_terminal_match,
+ end_group => $break_alignment_after,
+ Kend => $Kend,
+ ci_level => $ci_level,
+ level => $level,
+ level_end => $level_end,
+ imax_pair => -1,
+ maximum_line_length => $maximum_line_length,
+ }
);
- # --------------------------------------------------------------------
- # It simplifies things to create a zero length side comment
- # if none exists.
- # --------------------------------------------------------------------
- make_side_comment( $new_line, $level_end );
-
# --------------------------------------------------------------------
# Decide if this is a simple list of items.
- # There are 3 list types: none, comma, comma-arrow.
- # We use this below to be less restrictive in deciding what to align.
+ # We use this to be less restrictive in deciding what to align.
# --------------------------------------------------------------------
- if ($is_forced_break) {
- decide_if_list($new_line);
- }
+ decide_if_list($new_line) if ($list_seqno);
# --------------------------------------------------------------------
# Append this line to the current group (or start new group)
# --------------------------------------------------------------------
- if ( !@group_lines ) {
- add_to_group($new_line);
- }
- else {
- push_group_line($new_line);
- }
+
+ $self->push_group_line($new_line);
+ $self->[_group_maximum_line_length_] = $maximum_line_length;
# output this group if it ends in a terminal else or ternary line
if ( defined($j_terminal_match) ) {
- my_flush();
+ $self->_flush_group_lines();
}
# Force break after jump to lower level
- if ( $level_jump < 0 ) {
- my_flush();
+ elsif ($level_end < $level
+ || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
+ {
+ $self->_flush_group_lines(-1);
}
# --------------------------------------------------------------------
# Some old debugging stuff
# --------------------------------------------------------------------
- VALIGN_DEBUG_FLAG_APPEND && do {
- print STDOUT "APPEND fields:";
+ DEBUG_VALIGN && do {
+ print STDOUT "exiting valign_input fields:";
dump_array( @{$rfields} );
- print STDOUT "APPEND tokens:";
+ print STDOUT "exiting valign_input tokens:";
dump_array( @{$rtokens} );
- print STDOUT "APPEND patterns:";
+ print STDOUT "exiting valign_input patterns:";
dump_array( @{$rpatterns} );
- dump_alignments();
};
return;
sub join_hanging_comment {
- my $line = shift;
- my $jmax = $line->get_jmax();
- return 0 unless $jmax == 1; # must be 2 fields
- my $rtokens = $line->get_rtokens();
- return 0 unless $rtokens->[0] eq '#'; # the second field is a comment..
- my $rfields = $line->get_rfields();
- return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty...
- my $old_line = shift;
+ # Add dummy fields to a hanging side comment to make it look
+ # like the first line in its potential group. This simplifies
+ # the coding.
+ my ( $new_line, $old_line ) = @_;
+
+ my $jmax = $new_line->get_jmax();
+
+ # must be 2 fields
+ return 0 unless $jmax == 1;
+ my $rtokens = $new_line->get_rtokens();
+
+ # the second field must be a comment
+ return 0 unless $rtokens->[0] eq '#';
+ my $rfields = $new_line->get_rfields();
+
+ # the first field must be empty
+ return 0 unless $rfields->[0] =~ /^\s*$/;
+
+ # the current line must have fewer fields
my $maximum_field_index = $old_line->get_jmax();
return 0
- unless $maximum_field_index > $jmax; # the current line has more fields
- my $rpatterns = $line->get_rpatterns();
+ unless $maximum_field_index > $jmax;
- $line->set_is_hanging_side_comment(1);
+ # looks ok..
+ my $rpatterns = $new_line->get_rpatterns();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
+
+ $new_line->set_is_hanging_side_comment(1);
$jmax = $maximum_field_index;
- $line->set_jmax($jmax);
+ $new_line->set_jmax($jmax);
$rfields->[$jmax] = $rfields->[1];
+ $rfield_lengths->[$jmax] = $rfield_lengths->[1];
$rtokens->[ $jmax - 1 ] = $rtokens->[0];
$rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
foreach my $j ( 1 .. $jmax - 1 ) {
- $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
+ $rfields->[$j] = '';
+ $rfield_lengths->[$j] = 0;
$rtokens->[ $j - 1 ] = "";
$rpatterns->[ $j - 1 ] = "";
}
return 1;
}
-sub eliminate_old_fields {
+{ ## closure for sub decide_if_list
- my $new_line = shift;
- my $jmax = $new_line->get_jmax();
- if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
- if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
+ my %is_comma_token;
- # there must be one previous line
- return unless ( @group_lines == 1 );
-
- my $old_line = shift;
- my $maximum_field_index = $old_line->get_jmax();
+ BEGIN {
- ###############################################
- # Moved below to allow new coding for => matches
- # return unless $maximum_field_index > $jmax;
- ###############################################
-
- # Identify specific cases where field elimination is allowed:
- # case=1: both lines have comma-separated lists, and the first
- # line has an equals
- # case=2: both lines have leading equals
-
- # case 1 is the default
- my $case = 1;
-
- # See if case 2: both lines have leading '='
- # We'll require similar leading patterns in this case
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $rpatterns = $new_line->get_rpatterns();
- my $old_rpatterns = $old_line->get_rpatterns();
- if ( $rtokens->[0] =~ /^=>?\d*$/
- && $old_rtokens->[0] eq $rtokens->[0]
- && $old_rpatterns->[0] eq $rpatterns->[0] )
- {
- $case = 2;
+ my @q = qw( => );
+ push @q, ',';
+ @is_comma_token{@q} = (1) x scalar(@q);
}
- # not too many fewer fields in new line for case 1
- return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
+ sub decide_if_list {
- # case 1 must have side comment
- my $old_rfields = $old_line->get_rfields();
- return
- if ( $case == 1
- && length( $old_rfields->[$maximum_field_index] ) == 0 );
-
- my $rfields = $new_line->get_rfields();
+ my $line = shift;
- my $hid_equals = 0;
-
- my @new_alignments = ();
- my @new_fields = ();
- my @new_matching_patterns = ();
- my @new_matching_tokens = ();
-
- my $j = 0;
- my $current_field = '';
- my $current_pattern = '';
-
- # loop over all old tokens
- my $in_match = 0;
- foreach my $k ( 0 .. $maximum_field_index - 1 ) {
- $current_field .= $old_rfields->[$k];
- $current_pattern .= $old_rpatterns->[$k];
- last if ( $j > $jmax - 1 );
-
- if ( $old_rtokens->[$k] eq $rtokens->[$j] ) {
- $in_match = 1;
- $new_fields[$j] = $current_field;
- $new_matching_patterns[$j] = $current_pattern;
- $current_field = '';
- $current_pattern = '';
- $new_matching_tokens[$j] = $old_rtokens->[$k];
- $new_alignments[$j] = $old_line->get_alignment($k);
- $j++;
- }
- else {
+ # A list will be taken to be a line with a forced break in which all
+ # of the field separators are commas or comma-arrows (except for the
+ # trailing #)
- if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) {
- last if ( $case == 2 ); # avoid problems with stuff
- # like: $a=$b=$c=$d;
- $hid_equals = 1;
+ my $rtokens = $line->get_rtokens();
+ my $test_token = $rtokens->[0];
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($test_token);
+ if ( $is_comma_token{$raw_tok} ) {
+ my $list_type = $test_token;
+ my $jmax = $line->get_jmax();
+
+ foreach ( 1 .. $jmax - 2 ) {
+ ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token( $rtokens->[$_] );
+ if ( !$is_comma_token{$raw_tok} ) {
+ $list_type = "";
+ last;
+ }
}
- last
- if ( $in_match && $case == 1 )
- ; # disallow gaps in matching field types in case 1
+ $line->set_list_type($list_type);
}
+ return;
}
+}
- # Modify the current state if we are successful.
- # We must exactly reach the ends of the new list for success, and the old
- # pattern must have more fields. Here is an example where the first and
- # second lines have the same number, and we should not align:
- # my @a = map chr, 0 .. 255;
- # my @b = grep /\W/, @a;
- # my @c = grep /[^\w]/, @a;
-
- # Otherwise, we would get all of the commas aligned, which doesn't work as
- # well:
- # my @a = map chr, 0 .. 255;
- # my @b = grep /\W/, @a;
- # my @c = grep /[^\w]/, @a;
-
- if ( ( $j == $jmax )
- && ( $current_field eq '' )
- && ( $case != 1 || $hid_equals )
- && ( $maximum_field_index > $jmax ) )
- {
- my $k = $maximum_field_index;
- $current_field .= $old_rfields->[$k];
- $current_pattern .= $old_rpatterns->[$k];
- $new_fields[$j] = $current_field;
- $new_matching_patterns[$j] = $current_pattern;
-
- $new_alignments[$j] = $old_line->get_alignment($k);
- $maximum_field_index = $j;
-
- $old_line->set_alignments(@new_alignments);
- $old_line->set_jmax($jmax);
- $old_line->set_rtokens( \@new_matching_tokens );
- $old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@{$rpatterns} );
- }
+sub fix_terminal_ternary {
- # Dumb Down starting match if necessary:
+ # Add empty fields as necessary to align a ternary term
+ # like this:
#
- # Consider the following two lines:
+ # my $leapyear =
+ # $year % 4 ? 0
+ # : $year % 100 ? 1
+ # : $year % 400 ? 0
+ # : 1;
#
- # {
- # $a => 20 > 3 ? 1 : 0,
- # $xyz => 5,
- # }
-
- # We would like to get alignment regardless of the order of the two lines.
- # If the lines come in in this order, then we will simplify the patterns of
- # the first line in sub eliminate_new_fields. If the lines come in reverse
- # order, then we achieve this with eliminate_new_fields.
-
- # This update is currently restricted to leading '=>' matches. Although we
- # could do this for both '=' and '=>', overall the results for '=' come out
- # better without this step because this step can eliminate some other good
- # matches. For example, with the '=' we get:
-
-# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
-# my @dsf = map "$_\x{FFFE}Fred", @disilva;
-# my @dsj = map "$_\x{FFFE}John", @disilva;
-# my @dsJ = map "$_ John", @disilva;
-
- # without including '=' we get:
-
-# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
-# my @dsf = map "$_\x{FFFE}Fred", @disilva;
-# my @dsj = map "$_\x{FFFE}John", @disilva;
-# my @dsJ = map "$_ John", @disilva;
- elsif (
- $case == 2
-
- && @new_matching_tokens == 1
- ##&& $new_matching_tokens[0] =~ /^=/ # see note above
- && $new_matching_tokens[0] =~ /^=>/
- && $maximum_field_index > 2
- )
- {
- my $jmaxm = $jmax - 1;
- my $kmaxm = $maximum_field_index - 1;
- my $have_side_comment = $old_rtokens->[$kmaxm] eq '#';
-
- # We need to reduce the group pattern to be just two tokens,
- # the leading equality or => and the final side comment
-
- my $mid_field = join "",
- @{$old_rfields}[ 1 .. $maximum_field_index - 1 ];
- my $mid_patterns = join "",
- @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ];
- my @new_alignments = (
- $old_line->get_alignment(0),
- $old_line->get_alignment( $maximum_field_index - 1 )
- );
- my @new_tokens =
- ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] );
- my @new_fields = (
- $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index]
- );
- my @new_patterns = (
- $old_rpatterns->[0], $mid_patterns,
- $old_rpatterns->[$maximum_field_index]
- );
-
- $maximum_field_index = 2;
- $old_line->set_jmax($maximum_field_index);
- $old_line->set_rtokens( \@new_tokens );
- $old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@new_patterns );
-
- initialize_for_new_group();
- add_to_group($old_line);
- }
- return;
-}
+ # returns the index of the terminal question token, if any
-# create an empty side comment if none exists
-sub make_side_comment {
- my ( $new_line, $level_end ) = @_;
- my $jmax = $new_line->get_jmax();
- my $rtokens = $new_line->get_rtokens();
-
- # if line does not have a side comment...
- if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- $rtokens->[$jmax] = '#';
- $rfields->[ ++$jmax ] = '';
- $rpatterns->[$jmax] = '#';
- $new_line->set_jmax($jmax);
- $new_line->set_jmax_original_line($jmax);
- }
-
- # line has a side comment..
- else {
+ my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
+ $group_level )
+ = @_;
- # don't remember old side comment location for very long
- my $line_number = $vertical_aligner_self->get_output_line_number();
- my $rfields = $new_line->get_rfields();
- if (
- $line_number - $last_side_comment_line_number > 12
+ return unless ($old_line);
+ use constant EXPLAIN_TERNARY => 0;
- # and don't remember comment location across block level changes
- || ( $level_end < $last_side_comment_level
- && $rfields->[0] =~ /^}/ )
- )
- {
- forget_side_comment();
- }
- $last_side_comment_line_number = $line_number;
- $last_side_comment_level = $level_end;
+ if (%valign_control_hash) {
+ my $align_ok = $valign_control_hash{'?'};
+ $align_ok = $valign_control_default unless defined($align_ok);
+ return unless ($align_ok);
}
- return;
-}
-sub decide_if_list {
+ my $jmax = @{$rfields} - 1;
+ my $rfields_old = $old_line->get_rfields();
- my $line = shift;
+ my $rpatterns_old = $old_line->get_rpatterns();
+ my $rtokens_old = $old_line->get_rtokens();
+ my $maximum_field_index = $old_line->get_jmax();
- # A list will be taken to be a line with a forced break in which all
- # of the field separators are commas or comma-arrows (except for the
- # trailing #)
+ # look for the question mark after the :
+ my ($jquestion);
+ my $depth_question;
+ my $pad = "";
+ my $pad_length = 0;
+ foreach my $j ( 0 .. $maximum_field_index - 1 ) {
+ my $tok = $rtokens_old->[$j];
+ my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
+ if ( $raw_tok eq '?' ) {
+ $depth_question = $lev;
- # List separator tokens are things like ',3' or '=>2',
- # where the trailing digit is the nesting depth. Allow braces
- # to allow nested list items.
- my $rtokens = $line->get_rtokens();
- my $test_token = $rtokens->[0];
- if ( $test_token =~ /^(\,|=>)/ ) {
- my $list_type = $test_token;
- my $jmax = $line->get_jmax();
+ # depth must be correct
+ next unless ( $depth_question eq $group_level );
- foreach ( 1 .. $jmax - 2 ) {
- if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) {
- $list_type = "";
- last;
+ $jquestion = $j;
+ if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
+ $pad_length = length($1);
+ $pad = " " x $pad_length;
+ }
+ else {
+ return; # shouldn't happen
}
+ last;
}
- $line->set_list_type($list_type);
}
- return;
-}
+ return unless ( defined($jquestion) ); # shouldn't happen
-sub eliminate_new_fields {
-
- my ( $new_line, $old_line ) = @_;
- return unless (@group_lines);
- my $jmax = $new_line->get_jmax();
-
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $is_assignment =
- ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
-
- # must be monotonic variation
- return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
-
- # must be more fields in the new line
- my $maximum_field_index = $old_line->get_jmax();
- return unless ( $maximum_field_index < $jmax );
-
- unless ($is_assignment) {
- return
- unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
- ; # only if monotonic
-
- # never combine fields of a comma list
- return
- unless ( $maximum_field_index > 1 )
- && ( $new_line->get_list_type() !~ /^,/ );
- }
-
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $old_rpatterns = $old_line->get_rpatterns();
-
- # loop over all OLD tokens except comment and check match
- my $match = 1;
- foreach my $k ( 0 .. $maximum_field_index - 2 ) {
- if ( ( $old_rtokens->[$k] ne $rtokens->[$k] )
- || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) )
- {
- $match = 0;
- last;
- }
- }
-
- # first tokens agree, so combine extra new tokens
- if ($match) {
- foreach my $k ( $maximum_field_index .. $jmax - 1 ) {
-
- $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k];
- $rfields->[$k] = "";
- $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k];
- $rpatterns->[$k] = "";
- }
-
- $rtokens->[ $maximum_field_index - 1 ] = '#';
- $rfields->[$maximum_field_index] = $rfields->[$jmax];
- $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax];
- $jmax = $maximum_field_index;
- }
- $new_line->set_jmax($jmax);
- return;
-}
-
-sub fix_terminal_ternary {
-
- # Add empty fields as necessary to align a ternary term
- # like this:
- #
- # my $leapyear =
- # $year % 4 ? 0
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- #
- # returns 1 if the terminal item should be indented
-
- my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_;
- return unless ($old_line);
-
-## FUTURE CODING
-## my ( $old_line, $end_line ) = @_;
-## return unless ( $old_line && $end_line );
-##
-## my $rfields = $end_line->get_rfields();
-## my $rpatterns = $end_line->get_rpatterns();
-## my $rtokens = $end_line->get_rtokens();
-
- my $jmax = @{$rfields} - 1;
- my $rfields_old = $old_line->get_rfields();
-
- my $rpatterns_old = $old_line->get_rpatterns();
- my $rtokens_old = $old_line->get_rtokens();
- my $maximum_field_index = $old_line->get_jmax();
-
- # look for the question mark after the :
- my ($jquestion);
- my $depth_question;
- my $pad = "";
- foreach my $j ( 0 .. $maximum_field_index - 1 ) {
- my $tok = $rtokens_old->[$j];
- if ( $tok =~ /^\?(\d+)$/ ) {
- $depth_question = $1;
-
- # depth must be correct
- next unless ( $depth_question eq $group_level );
-
- $jquestion = $j;
- if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
- $pad = " " x length($1);
- }
- else {
- return; # shouldn't happen
- }
- last;
- }
- }
- return unless ( defined($jquestion) ); # shouldn't happen
-
- # Now splice the tokens and patterns of the previous line
- # into the else line to insure a match. Add empty fields
- # as necessary.
- my $jadd = $jquestion;
+ # Now splice the tokens and patterns of the previous line
+ # into the else line to insure a match. Add empty fields
+ # as necessary.
+ my $jadd = $jquestion;
# Work on copies of the actual arrays in case we have
# to return due to an error
- my @fields = @{$rfields};
- my @patterns = @{$rpatterns};
- my @tokens = @{$rtokens};
+ my @fields = @{$rfields};
+ my @patterns = @{$rpatterns};
+ my @tokens = @{$rtokens};
+ my @field_lengths = @{$rfield_lengths};
- VALIGN_DEBUG_FLAG_TERNARY && do {
+ EXPLAIN_TERNARY && do {
local $" = '><';
print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
# Note that this padding will remain even if the terminal value goes
# out on a separate line. This does not seem to look to bad, so no
# mechanism has been included to undo it.
- my $field1 = shift @fields;
+ my $field1 = shift @fields;
+ my $field_length1 = shift @field_lengths;
+ my $len_colon = length($colon);
unshift @fields, ( $colon, $pad . $therest );
+ unshift @field_lengths,
+ ( $len_colon, $pad_length + $field_length1 - $len_colon );
# change the leading pattern from : to ?
return unless ( $patterns[0] =~ s/^\:/?/ );
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
# handle sub-case of first field just equal to leading colon.
$patterns[1] = "?b" . $patterns[1];
# pad the second field
- $fields[1] = $pad . $fields[1];
+ $fields[1] = $pad . $fields[1];
+ $field_lengths[1] = $pad_length + $field_lengths[1];
# install leading tokens and patterns of existing line, replacing
# leading token and inserting appropriate number of empty fields
splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
}
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
- $jadd = $jquestion + 1;
- $fields[0] = $pad . $fields[0];
- splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+ $jadd = $jquestion + 1;
+ $fields[0] = $pad . $fields[0];
+ $field_lengths[0] = $pad_length + $field_lengths[0];
+ splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+ splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
}
- VALIGN_DEBUG_FLAG_TERNARY && do {
+ EXPLAIN_TERNARY && do {
local $" = '><';
print STDOUT "MODIFIED TOKENS=<@tokens>\n";
print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
};
# all ok .. update the arrays
- @{$rfields} = @fields;
- @{$rtokens} = @tokens;
- @{$rpatterns} = @patterns;
-## FUTURE CODING
-## $end_line->set_rfields( \@fields );
-## $end_line->set_rtokens( \@tokens );
-## $end_line->set_rpatterns( \@patterns );
+ @{$rfields} = @fields;
+ @{$rtokens} = @tokens;
+ @{$rpatterns} = @patterns;
+ @{$rfield_lengths} = @field_lengths;
# force a flush after this line
return $jquestion;
#
# returns a positive value if the else block should be indented
#
- my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_;
+ my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
+
return unless ($old_line);
my $jmax = @{$rfields} - 1;
return unless ( $jmax > 0 );
+ if (%valign_control_hash) {
+ my $align_ok = $valign_control_hash{'{'};
+ $align_ok = $valign_control_default unless defined($align_ok);
+ return unless ($align_ok);
+ }
+
# check for balanced else block following if/elsif/unless
my $rfields_old = $old_line->get_rfields();
my $jadd = $jbrace - $jparen;
splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
- splice( @{$rfields}, 1, 0, ('') x $jadd );
+ splice( @{$rfields}, 1, 0, ('') x $jadd );
+ splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
# force a flush after this line if it does not follow a case
if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
else { return $jbrace }
}
-{ # sub check_match
- my %is_good_alignment;
-
- BEGIN {
-
- # Vertically aligning on certain "good" tokens is usually okay
- # so we can be less restrictive in marginal cases.
- my @q = qw( { ? => = );
- push @q, (',');
- @is_good_alignment{@q} = (1) x scalar(@q);
- }
+my %is_closing_block_type;
- sub check_match {
+BEGIN {
+ @_ = qw< } ] >;
+ @is_closing_block_type{@_} = (1) x scalar(@_);
+}
- # See if the current line matches the current vertical alignment group.
- # If not, flush the current group.
- my ( $new_line, $old_line ) = @_;
+sub check_match {
- # uses global variables:
- # $previous_minimum_jmax_seen
- # $maximum_jmax_seen
- # $marginal_match
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $old_line->get_jmax();
+ # See if the current line matches the current vertical alignment group.
- # flush if this line has too many fields
- # variable $GoToLoc indicates goto branch point, for debugging
- my $GoToLoc = 1;
- if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
+ my ( $self, $new_line, $base_line, $prev_line ) = @_;
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen <
- $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- $GoToLoc = 2;
- goto NO_MATCH;
- }
+ # Given:
+ # $new_line = the line being considered for group inclusion
+ # $base_line = the first line of the current group
+ # $prev_line = the line just before $new_line
- # otherwise see if this line matches the current group
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
-
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
-
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
- }
-
- # handle comma-separated lists ..
- if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
- for my $j ( 0 .. $jlimit ) {
- my $old_tok = $old_rtokens->[$j];
- next unless $old_tok;
- my $new_tok = $rtokens->[$j];
- next unless $new_tok;
-
- # lists always match ...
- # unless they would align any '=>'s with ','s
- $GoToLoc = 3;
- goto NO_MATCH
- if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
- || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
- }
- }
-
- # do detailed check for everything else except hanging side comments
- elsif ( !$is_hanging_side_comment ) {
-
- my $leading_space_count = $new_line->get_leading_space_count();
-
- my $max_pad = 0;
- my $min_pad = 0;
- my $saw_good_alignment;
-
- for my $j ( 0 .. $jlimit ) {
-
- my $old_tok = $old_rtokens->[$j];
- my $new_tok = $rtokens->[$j];
-
- # Note on encoding used for alignment tokens:
- # -------------------------------------------
- # Tokens are "decorated" with information which can help
- # prevent unwanted alignments. Consider for example the
- # following two lines:
- # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
- # local ( $i, $f ) = &'bdiv( $xn, $xd );
- # There are three alignment tokens in each line, a comma,
- # an =, and a comma. In the first line these three tokens
- # are encoded as:
- # ,4+local-18 =3 ,4+split-7
- # and in the second line they are encoded as
- # ,4+local-18 =3 ,4+&'bdiv-8
- # Tokens always at least have token name and nesting
- # depth. So in this example the ='s are at depth 3 and
- # the ,'s are at depth 4. This prevents aligning tokens
- # of different depths. Commas contain additional
- # information, as follows:
- # , {depth} + {container name} - {spaces to opening paren}
- # This allows us to reject matching the rightmost commas
- # in the above two lines, since they are for different
- # function calls. This encoding is done in
- # 'sub send_lines_to_vertical_aligner'.
-
- # Pick off actual token.
- # Everything up to the first digit is the actual token.
- my $alignment_token = $new_tok;
- if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
-
- # see if the decorated tokens match
- my $tokens_match = $new_tok eq $old_tok
-
- # Exception for matching terminal : of ternary statement..
- # consider containers prefixed by ? and : a match
- || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
-
- # No match if the alignment tokens differ...
- if ( !$tokens_match ) {
-
- # ...Unless this is a side comment
- if (
- $j == $jlimit
-
- # and there is either at least one alignment token
- # or this is a single item following a list. This
- # latter rule is required for 'December' to join
- # the following list:
- # my (@months) = (
- # '', 'January', 'February', 'March',
- # 'April', 'May', 'June', 'July',
- # 'August', 'September', 'October', 'November',
- # 'December'
- # );
- # If it doesn't then the -lp formatting will fail.
- && ( $j > 0 || $old_tok =~ /^,/ )
- )
- {
- $marginal_match = 1
- if ( $marginal_match == 0
- && @group_lines == 1 );
- last;
- }
+ # returns a flag and a value as follows:
+ # return (0, $imax_align) if the line does not match
+ # return (1, $imax_align) if the line matches but does not fit
+ # return (2, $imax_align) if the line matches and fits
- $GoToLoc = 4;
- goto NO_MATCH;
- }
+ # Returns '$imax_align' which is the index of the maximum matching token.
+ # It will be used in the subsequent left-to-right sweep to align as many
+ # tokens as possible for lines which partially match.
+ my $imax_align = -1;
- # Calculate amount of padding required to fit this in.
- # $pad is the number of spaces by which we must increase
- # the current field to squeeze in this field.
- my $pad =
- length( $rfields->[$j] ) - $old_line->current_field_width($j);
- if ( $j == 0 ) { $pad += $leading_space_count; }
-
- # remember max pads to limit marginal cases
- if ( $alignment_token ne '#' ) {
- if ( $pad > $max_pad ) { $max_pad = $pad }
- if ( $pad < $min_pad ) { $min_pad = $pad }
- }
- if ( $is_good_alignment{$alignment_token} ) {
- $saw_good_alignment = 1;
- }
+ # variable $GoToMsg explains reason for no match, for debugging
+ my $GoToMsg = "";
+ use constant EXPLAIN_CHECK_MATCH => 0;
- # If patterns don't match, we have to be careful...
- if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) {
-
- # flag this as a marginal match since patterns differ
- $marginal_match = 1
- if ( $marginal_match == 0 && @group_lines == 1 );
-
- # We have to be very careful about aligning commas
- # when the pattern's don't match, because it can be
- # worse to create an alignment where none is needed
- # than to omit one. Here's an example where the ','s
- # are not in named containers. The first line below
- # should not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $alignment_token eq ',' ) {
-
- # do not align commas unless they are in named containers
- $GoToLoc = 5;
- goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
- }
+ # This is a flag for testing alignment by sub sweep_left_to_right only.
+ # This test can help find problems with the alignment logic.
+ # This flag should normally be zero.
+ use constant TEST_SWEEP_ONLY => 0;
- # do not align parens unless patterns match;
- # large ugly spaces can occur in math expressions.
- elsif ( $alignment_token eq '(' ) {
+ my $jmax = $new_line->get_jmax();
+ my $maximum_field_index = $base_line->get_jmax();
- # But we can allow a match if the parens don't
- # require any padding.
- $GoToLoc = 6;
- if ( $pad != 0 ) { goto NO_MATCH }
- }
+ my $jlimit = $jmax - 2;
+ if ( $jmax > $maximum_field_index ) {
+ $jlimit = $maximum_field_index - 2;
+ }
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $alignment_token eq '=' ) {
-
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
- if (
- substr( $old_rpatterns->[$j], 0, 1 ) ne
- substr( $rpatterns->[$j], 0, 1 ) )
- {
- $GoToLoc = 7;
- goto NO_MATCH;
- }
+ if ( $new_line->get_is_hanging_side_comment() ) {
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( @group_lines == 1 ) {
- $marginal_match =
- 2; # =2 prevents being undone below
- }
- }
- }
+ # HSC's can join the group if they fit
+ }
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
+ # Everything else
+ else {
- # Exception: suspend this rule to allow last lines to join
- $GoToLoc = 8;
- if ( $pad > 0 ) { goto NO_MATCH; }
- }
- } ## end for my $j ( 0 .. $jlimit)
-
- # Turn off the "marginal match" flag in some cases...
- # A "marginal match" occurs when the alignment tokens agree
- # but there are differences in the other tokens (patterns).
- # If we leave the marginal match flag set, then the rule is that we
- # will align only if there are more than two lines in the group.
- # We will turn of the flag if we almost have a match
- # and either we have seen a good alignment token or we
- # just need a small pad (2 spaces) to fit. These rules are
- # the result of experimentation. Tokens which misaligned by just
- # one or two characters are annoying. On the other hand,
- # large gaps to less important alignment tokens are also annoying.
- if ( $marginal_match == 1
- && $jmax == $maximum_field_index
- && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
- )
- {
- $marginal_match = 0;
- }
- ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
+ # A group with hanging side comments ends with the first non hanging
+ # side comment.
+ if ( $base_line->get_is_hanging_side_comment() ) {
+ $GoToMsg = "end of hanging side comments";
+ goto NO_MATCH;
}
- # We have a match (even if marginal).
- # If the current line has fewer fields than the current group
- # but otherwise matches, copy the remaining group fields to
- # make it a perfect match.
- if ( $maximum_field_index > $jmax ) {
-
- ##########################################################
- # FIXME: The previous version had a bug which made side comments
- # become regular fields, so for now the program does not allow a
- # line with side comment to match. This should eventually be done.
- # The best test file for experimenting is 'lista.t'
- ##########################################################
-
- my $comment = $rfields->[$jmax];
- $GoToLoc = 9;
- goto NO_MATCH if ($comment);
-
- # Corrected loop
- for my $jj ( $jlimit .. $maximum_field_index ) {
- $rtokens->[$jj] = $old_rtokens->[$jj];
- $rfields->[ $jj + 1 ] = '';
- $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ];
- }
-
-## THESE DO NOT GIVE CORRECT RESULTS
-## $rfields->[$jmax] = $comment;
-## $new_line->set_jmax($jmax);
+ # The number of tokens that this line shares with the previous line
+ # has been stored with the previous line. This value was calculated
+ # and stored by sub 'match_line_pair'.
+ $imax_align = $prev_line->get_imax_pair();
+ if ( $imax_align != $jlimit ) {
+ $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
+ goto NO_MATCH;
}
- return;
- NO_MATCH:
+ }
+
+ # The tokens match, but the lines must have identical number of
+ # tokens to join the group.
+ if ( $maximum_field_index != $jmax ) {
+ $GoToMsg = "token count differs";
+ goto NO_MATCH;
+ }
- # variable $GoToLoc is for debugging
- #print "no match from $GoToLoc\n";
+ # The tokens match. Now See if there is space for this line in the
+ # current group.
+ if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
- # Make one last effort to retain a match of certain statements
- my $match = salvage_equality_matches( $new_line, $old_line );
- my_flush_code() unless ($match);
- return;
+ EXPLAIN_CHECK_MATCH
+ && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
+ return ( 2, $jlimit );
}
-}
+ else {
-sub salvage_equality_matches {
- my ( $new_line, $old_line ) = @_;
+ EXPLAIN_CHECK_MATCH
+ && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
+ return ( 1, $jlimit );
+ }
- # Reduce the complexity of the two lines if it will allow us to retain
- # alignment of some common alignments, including '=' and '=>'. We will
- # convert both lines to have just two matching tokens, the equality and the
- # side comment.
-
- # return 0 or undef if unsuccessful
- # return 1 if successful
-
- # Here is a very simple example of two lines where we could at least
- # align the equals:
- # $x = $class->_sub( $x, $delta );
- # $xpownm1 = $class->_pow( $class->_copy($x), $nm1 ); # x(i)^(n-1)
-
- # We will only do this if there is one old line (and one new line)
- return unless ( @group_lines == 1 );
- return if ($is_matching_terminal_line);
-
- # We are only looking for equality type statements
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $is_equals =
- ( $rtokens->[0] =~ /=/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
- return unless ($is_equals);
-
- # The leading patterns must match
- my $old_rpatterns = $old_line->get_rpatterns();
- my $rpatterns = $new_line->get_rpatterns();
- return if ( $old_rpatterns->[0] ne $rpatterns->[0] );
-
- # Both should have side comment fields (should always be true)
- my $jmax_old = $old_line->get_jmax();
- my $jmax_new = $new_line->get_jmax();
- my $end_tok_old = $old_rtokens->[ $jmax_old - 1 ];
- my $end_tok_new = $rtokens->[ $jmax_new - 1 ];
- my $have_side_comments =
- defined($end_tok_old)
- && $end_tok_old eq '#'
- && defined($end_tok_new)
- && $end_tok_new eq '#';
- if ( !$have_side_comments ) { return; }
-
- # Do not match if any remaining tokens in new line include '?', 'if',
- # 'unless','||', '&&'. The reason is that (1) this isn't a great match, and
- # (2) we will prevent possibly better matchs to follow. Here is an
- # example. The match of the first two lines is rejected, and this allows
- # the second and third lines to match.
- # my $type = shift || "o";
- # my $fname = ( $type eq 'oo' ? 'orte_city' : 'orte' );
- # my $suffix = ( $coord_system eq 'standard' ? '' : '-orig' );
- # This logic can cause some unwanted losses of alignments, but it can retain
- # long runs of multiple-token alignments, so overall it is worthwhile.
- # If we had a peek at the subsequent line we could make a much better
- # decision here, but for now this is not available.
- for ( my $j = 1 ; $j < $jmax_new - 1 ; $j++ ) {
- my $new_tok = $rtokens->[$j];
-
- # git#16: do not consider fat commas as good aligmnents here
- my $is_good_alignment =
- ( $new_tok =~ /^(=|\?|if|unless|\|\||\&\&)/ && $new_tok !~ /^=>/ );
- return if ($is_good_alignment);
- }
-
- my $squeeze_line = sub {
- my ($line_obj) = @_;
-
- # reduce a line down to the three fields surrounding
- # the two tokens, an '=' of some sort and a '#' at the end
-
- my $jmax = $line_obj->get_jmax();
- my $jmax_new = 2;
- return unless $jmax > $jmax_new;
- my $rfields = $line_obj->get_rfields();
- my $rpatterns = $line_obj->get_rpatterns();
- my $rtokens = $line_obj->get_rtokens();
- my $rfields_new = [
- $rfields->[0], join( '', @{$rfields}[ 1 .. $jmax - 1 ] ),
- $rfields->[$jmax]
- ];
- my $rpatterns_new = [
- $rpatterns->[0], join( '', @{$rpatterns}[ 1 .. $jmax - 1 ] ),
- $rpatterns->[$jmax]
- ];
- my $rtokens_new = [ $rtokens->[0], $rtokens->[ $jmax - 1 ] ];
- $line_obj->{_rfields} = $rfields_new;
- $line_obj->{_rpatterns} = $rpatterns_new;
- $line_obj->{_rtokens} = $rtokens_new;
- $line_obj->set_jmax($jmax_new);
- };
+ NO_MATCH:
- # Okay, we will force a match at the equals-like token. We will fix both
- # lines to have just 2 tokens and 3 fields:
- $squeeze_line->($new_line);
- $squeeze_line->($old_line);
+ EXPLAIN_CHECK_MATCH
+ && print
+ "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
- # start over with a new group
- initialize_for_new_group();
- add_to_group($old_line);
- return 1;
+ return ( 0, $imax_align );
}
sub check_fit {
- my ( $new_line, $old_line ) = @_;
- return unless (@group_lines);
+ my ( $self, $new_line, $old_line ) = @_;
- my $jmax = $new_line->get_jmax();
- my $leading_space_count = $new_line->get_leading_space_count();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
+ # The new line has alignments identical to the current group. Now we have
+ # to fit the new line into the group without causing a field to exceed the
+ # line length limit.
+ # return true if successful
+ # return false if not successful
- my $group_list_type = $group_lines[0]->get_list_type();
+ my $jmax = $new_line->get_jmax();
+ my $leading_space_count = $new_line->get_leading_space_count();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
+ my $padding_available = $old_line->get_available_space_on_right();
+ my $jmax_old = $old_line->get_jmax();
+ my $rtokens_old = $old_line->get_rtokens();
+
+ # Safety check ... only lines with equal array sizes should arrive here
+ # from sub check_match. So if this error occurs, look at recent changes in
+ # sub check_match. It is only supposed to check the fit of lines with
+ # identical numbers of alignment tokens.
+ if ( $jmax_old ne $jmax ) {
+
+ warning(<<EOM);
+Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
+unexpected difference in array lengths: $jmax != $jmax_old
+EOM
+ return;
+ }
- my $padding_so_far = 0;
- my $padding_available = $old_line->get_available_space_on_right();
+ # Save current columns in case this line does not fit.
+ my @alignments = $old_line->get_alignments();
+ foreach my $alignment (@alignments) {
+ $alignment->save_column();
+ }
- # save current columns in case this doesn't work
- save_alignment_columns();
+ my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ # Loop over all alignments ...
my $maximum_field_index = $old_line->get_jmax();
for my $j ( 0 .. $jmax ) {
- my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
+ my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
if ( $j == 0 ) {
$pad += $leading_space_count;
}
- # remember largest gap of the group, excluding gap to side comment
- if ( $pad < 0
- && $group_maximum_gap < -$pad
- && $j > 0
- && $j < $jmax - 1 )
- {
- $group_maximum_gap = -$pad;
- }
-
- next if $pad < 0;
-
- ## OLD NOTES:
- ## This patch helps sometimes, but it doesn't check to see if
- ## the line is too long even without the side comment. It needs
- ## to be reworked.
- ##don't let a long token with no trailing side comment push
- ##side comments out, or end a group. (sidecmt1.t)
- ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
-
- # BEGIN PATCH for keith1.txt.
- # If the group began matching multiple tokens but later this got
- # reduced to a fewer number of matching tokens, then the fields
- # of the later lines will still have to fit into their corresponding
- # fields. So a large later field will "push" the other fields to
- # the right, including previous side comments, and if there is no room
- # then there is no match.
- # For example, look at the last line in the following snippet:
-
- # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false;
- # my $env = ($b_prod_db) ? "prd" : "val";
- # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL";
- # my $task = $OPT{t};
- # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash";
-
- # The long term will push the '?' to the right to fit in, and in this
- # case there is not enough room so it will not match the equals unless
- # we do something special.
-
- # Usually it looks good to keep an initial alignment of '=' going, and
- # we can do this if the long term can fit in the space taken up by the
- # remaining fields (the ? : fields here).
+ # Keep going if this field does not need any space.
+ next if ( $pad < 0 );
- # Allowing any matching token for now, but it could be restricted
- # to an '='-like token if necessary.
+ # Revert to the starting state if does not fit
+ if ( $pad > $padding_available ) {
- if (
- $pad > $padding_available
- && $jmax == 2 # matching one thing (plus #)
- && $j == $jmax - 1 # at last field
- && @group_lines > 1 # more than 1 line in group now
- && $jmax < $maximum_field_index # other lines have more fields
- && length( $rfields->[$jmax] ) == 0 # no side comment
-
- # Uncomment to match only equals (but this does not seem necessary)
- # && $rtokens->[0] =~ /^=\d/ # matching an equals
- )
- {
- my $extra_padding = 0;
- foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
- $extra_padding += $old_line->current_field_width($jj);
+ ################################################
+ # Line does not fit -- revert to starting state
+ ################################################
+ foreach my $alignment (@alignments) {
+ $alignment->restore_column();
}
-
- next if ( $pad <= $padding_available + $extra_padding );
- }
-
- # END PATCH for keith1.pl
-
- # This line will need space; lets see if we want to accept it..
- if (
-
- # not if this won't fit
- ( $pad > $padding_available )
-
- # previously, there were upper bounds placed on padding here
- # (maximum_whitespace_columns), but they were not really helpful
-
- )
- {
-
- # revert to starting state then flush; things didn't work out
- restore_alignment_columns();
- my_flush_code();
- last;
+ return;
}
- # patch to avoid excessive gaps in previous lines,
- # due to a line of fewer fields.
- # return join( ".",
- # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
- # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
- next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
-
- # looks ok, squeeze this field in
+ # make room for this field
$old_line->increase_field_width( $j, $pad );
$padding_available -= $pad;
-
- # remember largest gap of the group, excluding gap to side comment
- if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
- $group_maximum_gap = $pad;
- }
}
- return;
-}
-
-sub add_to_group {
- # The current line either starts a new alignment group or is
- # accepted into the current alignment group.
- my ($new_line) = @_;
- push_group_line($new_line);
+ ######################################
+ # The line fits, the match is accepted
+ ######################################
+ return 1;
- # initialize field lengths if starting new group
- if ( @group_lines == 1 ) {
+}
- my $jmax = $new_line->get_jmax();
- my $rfields = $new_line->get_rfields();
- my $rtokens = $new_line->get_rtokens();
- my $col = $new_line->get_leading_space_count();
+sub install_new_alignments {
- for my $j ( 0 .. $jmax ) {
- $col += length( $rfields->[$j] );
+ my ($new_line) = @_;
- # create initial alignments for the new group
- my $token = "";
- if ( $j < $jmax ) { $token = $rtokens->[$j] }
- my $alignment = make_alignment( $col, $token );
- $new_line->set_alignment( $j, $alignment );
- }
+ my $jmax = $new_line->get_jmax();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
+ my $col = $new_line->get_leading_space_count();
- $maximum_jmax_seen = $jmax;
- $minimum_jmax_seen = $jmax;
- }
+ for my $j ( 0 .. $jmax ) {
+ $col += $rfield_lengths->[$j];
- # use previous alignments otherwise
- else {
- my @new_alignments = $group_lines[-2]->get_alignments();
- $new_line->set_alignments(@new_alignments);
+ # create initial alignments for the new group
+ my $alignment =
+ Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
+ $new_line->set_alignment( $j, $alignment );
}
+ return;
+}
- # remember group jmax extremes for next call to valign_input
- $previous_minimum_jmax_seen = $minimum_jmax_seen;
- $previous_maximum_jmax_seen = $maximum_jmax_seen;
+sub copy_old_alignments {
+ my ( $new_line, $old_line ) = @_;
+ my @new_alignments = $old_line->get_alignments();
+ $new_line->set_alignments(@new_alignments);
return;
}
return;
}
-# flush() sends the current Perl::Tidy::VerticalAligner group down the
-# pipeline to Perl::Tidy::FileWriter.
-
-# This is the external flush, which also empties the buffer and cache
-sub flush {
-
- # the buffer must be emptied first, then any cached text
- dump_valign_buffer();
-
- if (@group_lines) {
- my_flush();
- }
- else {
- if ($cached_line_type) {
- $seqno_string = $cached_seqno_string;
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
- $cached_line_type = 0;
- $cached_line_text = "";
- $cached_seqno_string = "";
- }
- }
- return;
-}
-
-sub reduce_valign_buffer_indentation {
-
- my ($diff) = @_;
- if ( $valign_buffer_filling && $diff ) {
- my $max_valign_buffer = @valign_buffer;
- foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
- my ( $line, $leading_space_count, $level ) =
- @{ $valign_buffer[$i] };
- my $ws = substr( $line, 0, $diff );
- if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
- $line = substr( $line, $diff );
- }
- if ( $leading_space_count >= $diff ) {
- $leading_space_count -= $diff;
- $level = level_change( $leading_space_count, $diff, $level );
- }
- $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
- }
- }
- return;
-}
-
sub level_change {
# compute decrease in level when we remove $diff spaces from the
# leading spaces
- my ( $leading_space_count, $diff, $level ) = @_;
+ my ( $self, $leading_space_count, $diff, $level ) = @_;
+
+ my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
if ($rOpts_indent_columns) {
my $olev =
int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
return $level;
}
-sub dump_valign_buffer {
- if (@valign_buffer) {
- foreach (@valign_buffer) {
- valign_output_step_D( @{$_} );
- }
- @valign_buffer = ();
- }
- $valign_buffer_filling = "";
- return;
-}
+###############################################
+# CODE SECTION 4: Code to process comment lines
+###############################################
-sub my_flush_comment {
+sub _flush_comment_lines {
- # Output a group of COMMENT lines
+ # Output a group consisting of COMMENT lines
- return unless (@group_lines);
- my $leading_space_count = $comment_leading_space_count;
- my $leading_string = get_leading_string($leading_space_count);
+ my ($self) = @_;
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ return unless ( @{$rgroup_lines} );
+ my $group_level = $self->[_group_level_];
+ my $group_maximum_line_length = $self->[_group_maximum_line_length_];
+ my $leading_space_count = $self->[_comment_leading_space_count_];
+ my $leading_string =
+ $self->get_leading_string( $leading_space_count, $group_level );
# look for excessively long lines
my $max_excess = 0;
- foreach my $str (@group_lines) {
+ foreach my $item ( @{$rgroup_lines} ) {
+ my ( $str, $str_len ) = @{$item};
my $excess =
- length($str) +
- $leading_space_count -
- maximum_line_length_for_level($group_level);
+ $str_len + $leading_space_count - $group_maximum_line_length;
if ( $excess > $max_excess ) {
$max_excess = $excess;
}
if ( $max_excess > 0 ) {
$leading_space_count -= $max_excess;
if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
- $last_outdented_line_at = $file_writer_object->get_output_line_number();
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $last_outdented_line_at =
+ $file_writer_object->get_output_line_number();
+ my $nlines = @{$rgroup_lines};
+ $self->[_last_outdented_line_at_] =
+ $last_outdented_line_at + $nlines - 1;
+ my $outdented_line_count = $self->[_outdented_line_count_];
unless ($outdented_line_count) {
- $first_outdented_line_at = $last_outdented_line_at;
+ $self->[_first_outdented_line_at_] = $last_outdented_line_at;
}
- my $nlines = @group_lines;
$outdented_line_count += $nlines;
+ $self->[_outdented_line_count_] = $outdented_line_count;
}
# write the lines
my $outdent_long_lines = 0;
- foreach my $line (@group_lines) {
- valign_output_step_B( $leading_space_count, $line, 0,
- $outdent_long_lines, "", $group_level );
+
+ foreach my $item ( @{$rgroup_lines} ) {
+ my ( $str, $str_len, $Kend ) = @{$item};
+ $self->valign_output_step_B(
+ {
+ leading_space_count => $leading_space_count,
+ line => $str,
+ line_length => $str_len,
+ side_comment_length => 0,
+ outdent_long_lines => $outdent_long_lines,
+ rvertical_tightness_flags => undef,
+ level => $group_level,
+ level_end => $group_level,
+ Kend => $Kend,
+ maximum_line_length => $group_maximum_line_length,
+ }
+ );
}
- initialize_for_new_group();
+ $self->initialize_for_new_group();
return;
}
-sub my_flush_code {
+######################################################
+# CODE SECTION 5: Code to process groups of code lines
+######################################################
+
+sub _flush_group_lines {
+
+ # This is the vertical aligner internal flush, which leaves the cache
+ # intact
+ my ( $self, $level_jump ) = @_;
- # Output a group of CODE lines
+ # $level_jump = $next_level-$group_level, if known
+ # = undef if not known
+ # Note: only the sign of the jump is needed
- return unless (@group_lines);
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ return unless ( @{$rgroup_lines} );
+ my $group_type = $self->[_group_type_];
+ my $group_level = $self->[_group_level_];
- VALIGN_DEBUG_FLAG_APPEND0
- && do {
- my $group_list_type = $group_lines[0]->get_list_type();
+ # Debug
+ 0 && do {
my ( $a, $b, $c ) = caller();
- my $nlines = @group_lines;
- my $maximum_field_index = $group_lines[0]->get_jmax();
- my $rfields_old = $group_lines[0]->get_rfields();
- my $tok = $rfields_old->[0];
+ my $nlines = @{$rgroup_lines};
print STDOUT
-"APPEND0: my_flush_code called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$nlines extra=$extra_indent_ok first tok=$tok;\n";
-
- };
+"APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
+ };
- # some small groups are best left unaligned
- my $do_not_align = decide_if_aligned_pair();
+ ############################################
+ # Section 1: Handle a group of COMMENT lines
+ ############################################
+ if ( $group_type eq 'COMMENT' ) {
+ $self->_flush_comment_lines();
+ return;
+ }
- # optimize side comment location
- $do_not_align = adjust_side_comment($do_not_align);
+ #########################################################################
+ # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
+ # aligning happens here in the following steps:
+ #########################################################################
- # recover spaces for -lp option if possible
- my $extra_leading_spaces = get_extra_leading_spaces();
+ # STEP 1: Remove most unmatched tokens. They block good alignments.
+ my ( $max_lev_diff, $saw_side_comment ) =
+ delete_unmatched_tokens( $rgroup_lines, $group_level );
- # all lines of this group have the same basic leading spacing
- my $group_leader_length = $group_lines[0]->get_leading_space_count();
+ # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
+ # matching common alignments. The indexes of these subgroups are in the
+ # return variable.
+ my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
- # add extra leading spaces if helpful
- # NOTE: Use zero; this did not work well
- my $min_ci_gap = 0;
+ # STEP 3: Sweep left to right through the lines, looking for leading
+ # alignment tokens shared by groups.
+ sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
+ if ( @{$rgroups} > 1 );
- # output the lines
- foreach my $line (@group_lines) {
- valign_output_step_A( $line, $min_ci_gap, $do_not_align,
- $group_leader_length, $extra_leading_spaces );
+ # STEP 4: Move side comments to a common column if possible.
+ if ($saw_side_comment) {
+ $self->align_side_comments( $rgroup_lines, $rgroups );
}
- initialize_for_new_group();
- return;
-}
+ # STEP 5: For the -lp option, increase the indentation of lists
+ # to the desired amount, but do not exceed the line length limit.
-sub my_flush {
+ # We are allowed to shift a group of lines to the right if:
+ # (1) its level is greater than the level of the previous group, and
+ # (2) its level is greater than the level of the next line to be written.
- # This is the vertical aligner internal flush, which leaves the cache
- # intact
- return unless (@group_lines);
+ my $extra_indent_ok;
+ if ( $group_level > $self->[_last_level_written_] ) {
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- my ( $a, $b, $c ) = caller();
- my $nlines = @group_lines;
- print STDOUT
-"APPEND0: my_flush called from $a $b $c lines=$nlines, type=$group_type \n";
- };
+ # Use the level jump to next line to come, if given
+ if ( defined($level_jump) ) {
+ $extra_indent_ok = $level_jump < 0;
+ }
- # handle a group of COMMENT lines
- if ( $group_type eq 'COMMENT' ) { my_flush_comment() }
+ # Otherwise, assume the next line has the level of the end of last line.
+ # This fixes case c008.
+ else {
+ my $level_end = $rgroup_lines->[-1]->get_level_end();
+ $extra_indent_ok = $group_level > $level_end;
+ }
+ }
- # handle a single line of CODE
- elsif ( @group_lines == 1 ) { my_flush_code() }
+ my $extra_leading_spaces =
+ $extra_indent_ok
+ ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
+ : 0;
- # handle group(s) of CODE lines
- else {
+ # STEP 6: Output the lines.
+ # All lines in this group have the same leading spacing and maximum line
+ # length
+ my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
+ my $group_maximum_line_length =
+ $rgroup_lines->[0]->get_maximum_line_length();
- # LP FIX PART 1
- # If we are trying to add extra indentation for -lp formatting,
- # then we need to try to keep the group intact. But we have
- # to set the $extra_indent_ok flag to zero in case some lines
- # are output separately. We fix things up at the bottom.
- # NOTE: this is a workaround but is tentative; we should really look to
- # see if if extra indentation is possible.
- my $rOpt_lp = $rOpts->{'line-up-parentheses'};
- my $keep_group_intact = $rOpt_lp && $extra_indent_ok;
- my $extra_indent_ok_save = $extra_indent_ok;
- $extra_indent_ok = 0;
+ foreach my $line ( @{$rgroup_lines} ) {
+ $self->valign_output_step_A(
+ {
+ line => $line,
+ min_ci_gap => 0,
+ do_not_align => 0,
+ group_leader_length => $group_leader_length,
+ extra_leading_spaces => $extra_leading_spaces,
+ level => $group_level,
+ maximum_line_length => $group_maximum_line_length,
+ }
+ );
+ }
- # we will rebuild alignment line group(s);
- my @new_lines = @group_lines;
- initialize_for_new_group();
+ # Let the formatter know that this object has been processed and any
+ # recoverable spaces have been handled. This is needed for setting the
+ # closing paren location in -lp mode.
+ my $object = $rgroup_lines->[0]->get_indentation();
+ if ( ref($object) ) { $object->set_recoverable_spaces(0) }
- # remove unmatched tokens in all lines
- delete_unmatched_tokens( \@new_lines );
+ $self->initialize_for_new_group();
+ return;
+}
- foreach my $new_line (@new_lines) {
+{ ## closure for sub sweep_top_down
- # Start a new group if necessary
- if ( !@group_lines ) {
- add_to_group($new_line);
+ my $rall_lines; # all of the lines
+ my $grp_level; # level of all lines
+ my $rgroups; # describes the partition of lines we will make here
+ my $group_line_count; # number of lines in current partition
- next;
- }
+ BEGIN { $rgroups = [] }
- my $j_terminal_match = $new_line->get_j_terminal_match();
- my $base_line = $group_lines[0];
+ sub initialize_for_new_rgroup {
+ $group_line_count = 0;
+ return;
+ }
- # Initialize a global flag saying if the last line of the group
- # should match end of group and also terminate the group. There
- # should be no returns between here and where the flag is handled
- # at the bottom.
- my $col_matching_terminal = 0;
- if ( defined($j_terminal_match) ) {
+ sub add_to_rgroup {
- # remember the column of the terminal ? or { to match with
+ my ($jend) = @_;
+ my $rline = $rall_lines->[$jend];
+
+ my $jbeg = $jend;
+ if ( $group_line_count == 0 ) {
+ install_new_alignments($rline);
+ }
+ else {
+ my $rvals = pop @{$rgroups};
+ $jbeg = $rvals->[0];
+ copy_old_alignments( $rline, $rall_lines->[$jbeg] );
+ }
+ push @{$rgroups}, [ $jbeg, $jend, undef ];
+ $group_line_count++;
+ return;
+ }
+
+ sub get_rgroup_jrange {
+
+ return unless @{$rgroups};
+ return unless ( $group_line_count > 0 );
+ my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
+ return ( $jbeg, $jend );
+ }
+
+ sub end_rgroup {
+
+ my ($imax_align) = @_;
+ return unless @{$rgroups};
+ return unless ( $group_line_count > 0 );
+
+ my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
+ push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
+
+ # Undo some alignments of poor two-line combinations.
+ # We had to wait until now to know the line count.
+ if ( $jend - $jbeg == 1 ) {
+ my $line_0 = $rall_lines->[$jbeg];
+ my $line_1 = $rall_lines->[$jend];
+
+ my $imax_pair = $line_1->get_imax_pair();
+ if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
+
+ ## flag for possible future use:
+ ## my $is_isolated_pair = $imax_pair < 0
+ ## && ( $jbeg == 0
+ ## || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
+
+ my $imax_prev =
+ $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
+
+ my ( $is_marginal, $imax_align_fix ) =
+ is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
+ $imax_prev );
+ if ($is_marginal) {
+ combine_fields( $line_0, $line_1, $imax_align_fix );
+ }
+ }
+
+ initialize_for_new_rgroup();
+ return;
+ }
+
+ sub block_penultimate_match {
+
+ # emergency reset to prevent sweep_left_to_right from trying to match a
+ # failed terminal else match
+ return unless @{$rgroups} > 1;
+ $rgroups->[-2]->[2] = -1;
+ return;
+ }
+
+ sub sweep_top_down {
+ my ( $self, $rlines, $group_level ) = @_;
+
+ # Partition the set of lines into final alignment subgroups
+ # and store the alignments with the lines.
+
+ # The alignment subgroups we are making here are groups of consecutive
+ # lines which have (1) identical alignment tokens and (2) do not
+ # exceed the allowable maximum line length. A later sweep from
+ # left-to-right ('sweep_lr') will handle additional alignments.
+
+ # transfer args to closure variables
+ $rall_lines = $rlines;
+ $grp_level = $group_level;
+ $rgroups = [];
+ initialize_for_new_rgroup();
+ return unless @{$rlines}; # shouldn't happen
+
+ # Unset the _end_group flag for the last line if it it set because it
+ # is not needed and can causes problems for -lp formatting
+ $rall_lines->[-1]->set_end_group(0);
+
+ # Loop over all lines ...
+ my $jline = -1;
+ foreach my $new_line ( @{$rall_lines} ) {
+ $jline++;
+
+ # Start a new subgroup if necessary
+ if ( !$group_line_count ) {
+ add_to_rgroup($jline);
+ if ( $new_line->get_end_group() ) {
+ end_rgroup(-1);
+ }
+ next;
+ }
+
+ my $j_terminal_match = $new_line->get_j_terminal_match();
+ my ( $jbeg, $jend ) = get_rgroup_jrange();
+ if ( !defined($jbeg) ) {
+
+ # safety check, shouldn't happen
+ warning(<<EOM);
+Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
+undefined index for group line count $group_line_count
+EOM
+ $jbeg = $jline;
+ }
+ my $base_line = $rall_lines->[$jbeg];
+
+ # Initialize a global flag saying if the last line of the group
+ # should match end of group and also terminate the group. There
+ # should be no returns between here and where the flag is handled
+ # at the bottom.
+ my $col_matching_terminal = 0;
+ if ( defined($j_terminal_match) ) {
+
+ # remember the column of the terminal ? or { to match with
$col_matching_terminal =
$base_line->get_column($j_terminal_match);
- # set global flag for sub decide_if_aligned_pair
- $is_matching_terminal_line = 1;
+ # Ignore an undefined value as a defensive step; shouldn't
+ # normally happen.
+ $col_matching_terminal = 0
+ unless defined($col_matching_terminal);
}
# -------------------------------------------------------------
- # Allow hanging side comment to join current group, if any. This
- # will help keep side comments aligned, because otherwise we
- # will have to start a new group, making alignment less likely.
+ # Allow hanging side comment to join current group, if any. The
+ # only advantage is to keep the other tokens in the same group. For
+ # example, this would make the '=' align here:
+ # $ax = 1; # side comment
+ # # hanging side comment
+ # $boondoggle = 5; # side comment
+ # $beetle = 5; # side comment
+
+ # here is another example..
+
+ # _rtoc_name_count => {}, # hash to track ..
+ # _rpackage_stack => [], # stack to check ..
+ # # name changes
+ # _rlast_level => \$last_level, # brace indentation
+ #
+ #
+ # If this were not desired, the next step could be skipped.
# -------------------------------------------------------------
-
if ( $new_line->get_is_hanging_side_comment() ) {
join_hanging_comment( $new_line, $base_line );
}
# BEFORE this line unless both it and the previous line have side
# comments. This prevents this line from pushing side coments out
# to the right.
- elsif ( $new_line->get_jmax() == 1 && !$keep_group_intact ) {
+ elsif ( $new_line->get_jmax() == 1 ) {
# There are no matching tokens, so now check side comments.
# Programming note: accessing arrays with index -1 is
# risky in Perl, but we have verified there is at least one
# line in the group and that there is at least one field.
- my $prev_comment = $group_lines[-1]->get_rfields()->[-1];
+ my $prev_comment =
+ $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
my $side_comment = $new_line->get_rfields()->[-1];
- my_flush_code() unless ( $side_comment && $prev_comment );
-
+ end_rgroup(-1) unless ( $side_comment && $prev_comment );
}
- # -------------------------------------------------------------
- # If there is just one previous line, and it has more fields
- # than the new line, try to join fields together to get a match
- # with the new line. At the present time, only a single
- # leading '=' is allowed to be compressed out. This is useful
- # in rare cases where a table is forced to use old breakpoints
- # because of side comments,
- # and the table starts out something like this:
- # my %MonthChars = ('0', 'Jan', # side comment
- # '1', 'Feb',
- # '2', 'Mar',
- # Eliminating the '=' field will allow the remaining fields to
- # line up. This situation does not occur if there are no side
- # comments because scan_list would put a break after the
- # opening '('.
- # -------------------------------------------------------------
-
- eliminate_old_fields( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # If the new line has more fields than the current group,
- # see if we can match the first fields and combine the remaining
- # fields of the new line.
- # -------------------------------------------------------------
-
- eliminate_new_fields( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # Flush previous group unless all common tokens and patterns
- # match..
-
- check_match( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # See if there is space for this line in the current group (if
- # any)
- # -------------------------------------------------------------
- if (@group_lines) {
- check_fit( $new_line, $base_line );
+ # See if the new line matches and fits the current group,
+ # if it still exists. Flush the current group if not.
+ my $match_code;
+ if ($group_line_count) {
+ ( $match_code, my $imax_align ) =
+ $self->check_match( $new_line, $base_line,
+ $rall_lines->[ $jline - 1 ] );
+ if ( $match_code != 2 ) { end_rgroup($imax_align) }
}
- add_to_group($new_line);
+ # Store the new line
+ add_to_rgroup($jline);
if ( defined($j_terminal_match) ) {
- # if there is only one line in the group (maybe due to failure
- # to match perfectly with previous lines), then align the ? or
- # { of this terminal line with the previous one unless that
- # would make the line too long
- if ( @group_lines == 1 ) {
- $base_line = $group_lines[0];
+ # Decide if we should fix a terminal match. We can either:
+ # 1. fix it and prevent the sweep_lr from changing it, or
+ # 2. leave it alone and let sweep_lr try to fix it.
+
+ # The current logic is to fix it if:
+ # -it has not joined to previous lines,
+ # -and either the previous subgroup has just 1 line, or
+ # -this line matched but did not fit (so sweep won't work)
+ my $fixit;
+ if ( $group_line_count == 1 ) {
+ $fixit ||= $match_code;
+ if ( !$fixit ) {
+ if ( @{$rgroups} > 1 ) {
+ my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
+ my $nlines = $jendx - $jbegx + 1;
+ $fixit ||= $nlines <= 1;
+ }
+ }
+ }
+
+ if ($fixit) {
+ $base_line = $new_line;
my $col_now = $base_line->get_column($j_terminal_match);
- my $pad = $col_matching_terminal - $col_now;
+
+ # Ignore an undefined value as a defensive step; shouldn't
+ # normally happen.
+ $col_now = 0 unless defined($col_now);
+
+ my $pad = $col_matching_terminal - $col_now;
my $padding_available =
$base_line->get_available_space_on_right();
- if ( $pad > 0 && $pad <= $padding_available ) {
+ if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
$base_line->increase_field_width( $j_terminal_match,
$pad );
}
+
+ # do not let sweep_left_to_right change an isolated 'else'
+ if ( !$new_line->get_is_terminal_ternary() ) {
+ block_penultimate_match();
+ }
}
- my_flush_code();
- $is_matching_terminal_line = 0;
+ end_rgroup(-1);
}
- # Optional optimization; end the group if we know we cannot match
- # next line.
- elsif ( $new_line->{_end_group} ) {
- my_flush_code();
+ # end the group if we know we cannot match next line.
+ elsif ( $new_line->get_end_group() ) {
+ end_rgroup(-1);
}
+ } ## end loop over lines
+
+ end_rgroup(-1);
+ return ($rgroups);
+ }
+}
+
+sub two_line_pad {
+
+ my ( $line_m, $line, $imax_min ) = @_;
+
+ # Given:
+ # two isolated (list) lines
+ # imax_min = number of common alignment tokens
+ # Return:
+ # $pad_max = maximum suggested pad distnce
+ # = 0 if alignment not recommended
+ # Note that this is only for two lines which do not have alignment tokens
+ # in common with any other lines. It is intended for lists, but it might
+ # also be used for two non-list lines with a common leading '='.
+
+ # Allow alignment if the difference in the two unpadded line lengths
+ # is not more than either line length. The idea is to avoid
+ # aligning lines with very different field lengths, like these two:
+
+ # [
+ # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
+ # 1, 0, 0, 0, undef, 0, 0
+ # ];
+ my $rfield_lengths = $line->get_rfield_lengths();
+ my $rfield_lengths_m = $line_m->get_rfield_lengths();
+
+ # Safety check - shouldn't happen
+ return 0
+ unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
+
+ my $lensum_m = 0;
+ my $lensum = 0;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ $lensum_m += $rfield_lengths_m->[$i];
+ $lensum += $rfield_lengths->[$i];
+ }
+
+ my ( $lenmin, $lenmax ) =
+ $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
+
+ my $patterns_match;
+ if ( $line_m->get_list_type() && $line->get_list_type() ) {
+ $patterns_match = 1;
+ my $rpatterns_m = $line_m->get_rpatterns();
+ my $rpatterns = $line->get_rpatterns();
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $pat = $rpatterns->[$i];
+ my $pat_m = $rpatterns_m->[$i];
+ if ( $pat ne $pat_m ) { $patterns_match = 0; last }
+ }
+ }
+
+ my $pad_max = $lenmax;
+ if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
+
+ return $pad_max;
+}
+
+sub sweep_left_to_right {
+
+ my ( $rlines, $rgroups, $group_level ) = @_;
+
+ # So far we have divided the lines into groups having an equal number of
+ # identical alignments. Here we are going to look for common leading
+ # alignments between the different groups and align them when possible.
+ # For example, the three lines below are in three groups because each line
+ # has a different number of commas. In this routine we will sweep from
+ # left to right, aligning the leading commas as we go, but stopping if we
+ # hit the line length limit.
+
+ # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
+ # my ( $i, $j, $error, $aff, $asum, $avec );
+ # my ( $km, $area, $varea );
+
+ # nothing to do if just one group
+ my $ng_max = @{$rgroups} - 1;
+ return unless ( $ng_max > 0 );
+
+ ############################################################################
+ # Step 1: Loop over groups to find all common leading alignment tokens
+ ############################################################################
+
+ my $line;
+ my $rtokens;
+ my $imax; # index of maximum non-side-comment alignment token
+ my $istop; # an optional stopping index
+ my $jbeg; # starting line index
+ my $jend; # ending line index
+
+ my $line_m;
+ my $rtokens_m;
+ my $imax_m;
+ my $istop_m;
+ my $jbeg_m;
+ my $jend_m;
+
+ my $istop_mm;
+
+ # Look at neighboring pairs of groups and form a simple list
+ # of all common leading alignment tokens. Foreach such match we
+ # store [$i, $ng], where
+ # $i = index of the token in the line (0,1,...)
+ # $ng is the second of the two groups with this common token
+ my @icommon;
+
+ # Hash to hold the maximum alignment change for any group
+ my %max_move;
+
+ # a small number of columns
+ my $short_pad = 4;
+
+ my $ng = -1;
+ foreach my $item ( @{$rgroups} ) {
+ $ng++;
+
+ $istop_mm = $istop_m;
+
+ # save _m values of previous group
+ $line_m = $line;
+ $rtokens_m = $rtokens;
+ $imax_m = $imax;
+ $istop_m = $istop;
+ $jbeg_m = $jbeg;
+ $jend_m = $jend;
+
+ # Get values for this group. Note that we just have to use values for
+ # one of the lines of the group since all members have the same
+ # alignments.
+ ( $jbeg, $jend, $istop ) = @{$item};
+
+ $line = $rlines->[$jbeg];
+ $rtokens = $line->get_rtokens();
+ $imax = $line->get_jmax() - 2;
+ $istop = -1 unless ( defined($istop) );
+ $istop = $imax if ( $istop > $imax );
+
+ # Initialize on first group
+ next if ( $ng == 0 );
+
+ # Use the minimum index limit of the two groups
+ my $imax_min = $imax > $imax_m ? $imax_m : $imax;
+
+ # Also impose a limit if given.
+ if ( $istop_m < $imax_min ) {
+ $imax_min = $istop_m;
+ }
+
+ # Special treatment of two one-line groups isolated from other lines,
+ # unless they form a simple list or a terminal match. Otherwise the
+ # alignment can look strange in some cases.
+ my $list_type = $rlines->[$jbeg]->get_list_type();
+ if (
+ $jend == $jbeg
+ && $jend_m == $jbeg_m
+ && ( $ng == 1 || $istop_mm < 0 )
+ && ( $ng == $ng_max || $istop < 0 )
+ && !$line->get_j_terminal_match()
+
+ # Only do this for imperfect matches. This is normally true except
+ # when two perfect matches cannot form a group because the line
+ # length limit would be exceeded. In that case we can still try
+ # to match as many alignments as possible.
+ && ( $imax != $imax_m || $istop_m != $imax_m )
+ )
+ {
+
+ # We will just align assignments and simple lists
+ next unless ( $imax_min >= 0 );
+ next
+ unless ( $rtokens->[0] =~ /^=\d/
+ || $list_type );
+
+ # In this case we will limit padding to a short distance. This
+ # is a compromise to keep some vertical alignment but prevent large
+ # gaps, which do not look good for just two lines.
+ my $pad_max =
+ two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
+ next unless ($pad_max);
+ my $ng_m = $ng - 1;
+ $max_move{"$ng_m"} = $pad_max;
+ $max_move{"$ng"} = $pad_max;
}
- # LP FIX PART 2
- # if we managed to keep the group intact for -lp formatting,
- # restore the flag which allows extra indentation
- if ( $keep_group_intact && @group_lines == @new_lines ) {
- $extra_indent_ok = $extra_indent_ok_save;
+ # Loop to find all common leading tokens.
+ if ( $imax_min >= 0 ) {
+ foreach my $i ( 0 .. $imax_min ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ last if ( $tok ne $tok_m );
+ push @icommon, [ $i, $ng, $tok ];
+ }
+ }
+ }
+ return unless @icommon;
+
+ ###########################################################
+ # Step 2: Reorder and consolidate the list into a task list
+ ###########################################################
+
+ # We have to work first from lowest token index to highest, then by group,
+ # sort our list first on token index then group number
+ @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
+
+ # Make a task list of the form
+ # [$i, ng_beg, $ng_end, $tok], ..
+ # where
+ # $i is the index of the token to be aligned
+ # $ng_beg..$ng_end is the group range for this action
+ my @todo;
+ my ( $i, $ng_end, $tok );
+ foreach my $item (@icommon) {
+ my $ng_last = $ng_end;
+ my $i_last = $i;
+ ( $i, $ng_end, $tok ) = @{$item};
+ my $ng_beg = $ng_end - 1;
+ if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
+ my $var = pop(@todo);
+ $ng_beg = $var->[1];
}
- my_flush_code();
+ my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
+ push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
}
+
+ ###############################
+ # Step 3: Execute the task list
+ ###############################
+ do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
+ $group_level );
return;
}
+{ ## closure for sub do_left_to_right_sweep
+
+ my %is_good_alignment_token;
+
+ BEGIN {
+
+ # One of the most difficult aspects of vertical alignment is knowing
+ # when not to align. Alignment can go from looking very nice to very
+ # bad when overdone. In the sweep algorithm there are two special
+ # cases where we may need to limit padding to a '$short_pad' distance
+ # to avoid some very ugly formatting:
+
+ # 1. Two isolated lines with partial alignment
+ # 2. A 'tail-wag-dog' situation, in which a single terminal
+ # line with partial alignment could cause a significant pad
+ # increase in many previous lines if allowed to join the alignment.
+
+ # For most alignment tokens, we will allow only a small pad to be
+ # introduced (the hardwired $short_pad variable) . But for some 'good'
+ # alignments we can be less restrictive.
+
+ # These are 'good' alignments, which are allowed more padding:
+ my @q = qw(
+ => = ? if unless or || {
+ );
+ push @q, ',';
+ @is_good_alignment_token{@q} = (0) x scalar(@q);
+
+ # Promote a few of these to 'best', with essentially no pad limit:
+ $is_good_alignment_token{'='} = 1;
+ $is_good_alignment_token{'if'} = 1;
+ $is_good_alignment_token{'unless'} = 1;
+ $is_good_alignment_token{'=>'} = 1
+
+ # Note the hash values are set so that:
+ # if ($is_good_alignment_token{$raw_tok}) => best
+ # if defined ($is_good_alignment_token{$raw_tok}) => good or best
+
+ }
+
+ sub do_left_to_right_sweep {
+ my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
+ = @_;
+
+ # $blocking_level[$nj is the level at a match failure between groups
+ # $ng-1 and $ng
+ my @blocking_level;
+ my $group_list_type = $rlines->[0]->get_list_type();
+
+ my $move_to_common_column = sub {
+
+ # Move the alignment column of token $itok to $col_want for a
+ # sequence of groups.
+ my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
+ return unless ( defined($ngb) && $nge > $ngb );
+ foreach my $ng ( $ngb .. $nge ) {
+
+ my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
+ my $line = $rlines->[$jbeg];
+ my $col = $line->get_column($itok);
+ my $avail = $line->get_available_space_on_right();
+ my $move = $col_want - $col;
+ if ( $move > 0 ) {
+
+ # limit padding increase in isolated two lines
+ next
+ if ( defined( $rmax_move->{$ng} )
+ && $move > $rmax_move->{$ng}
+ && !$is_good_alignment_token{$raw_tok} );
+
+ $line->increase_field_width( $itok, $move );
+ }
+ elsif ( $move < 0 ) {
+
+ # spot to take special action on failure to move
+ }
+ }
+ return;
+ };
+
+ foreach my $task ( @{$rtodo} ) {
+ my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
+
+ # Nothing to do for a single group
+ next unless ( $ng_end > $ng_beg );
+
+ my $ng_first; # index of the first group of a continuous sequence
+ my $col_want; # the common alignment column of a sequence of groups
+ my $col_limit; # maximum column before bumping into max line length
+ my $line_count_ng_m = 0;
+ my $jmax_m;
+ my $it_stop_m;
+
+ # Loop over the groups
+ # 'ix_' = index in the array of lines
+ # 'ng_' = index in the array of groups
+ # 'it_' = index in the array of tokens
+ my $ix_min = $rgroups->[$ng_beg]->[0];
+ my $ix_max = $rgroups->[$ng_end]->[1];
+ my $lines_total = $ix_max - $ix_min + 1;
+ foreach my $ng ( $ng_beg .. $ng_end ) {
+ my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
+ my $line_count_ng = $ix_end - $ix_beg + 1;
+
+ # Important: note that since all lines in a group have a common
+ # alignments object, we just have to work on one of the lines
+ # (the first line). All of the rest will be changed
+ # automatically.
+ my $line = $rlines->[$ix_beg];
+ my $jmax = $line->get_jmax();
+
+ # the maximum space without exceeding the line length:
+ my $avail = $line->get_available_space_on_right();
+ my $col = $line->get_column($itok);
+ my $col_max = $col + $avail;
+
+ # Initialize on first group
+ if ( !defined($col_want) ) {
+ $ng_first = $ng;
+ $col_want = $col;
+ $col_limit = $col_max;
+ $line_count_ng_m = $line_count_ng;
+ $jmax_m = $jmax;
+ $it_stop_m = $it_stop;
+ next;
+ }
+
+ # RULE: Throw a blocking flag upon encountering a token level
+ # different from the level of the first blocking token. For
+ # example, in the following example, if the = matches get
+ # blocked between two groups as shown, then we want to start
+ # blocking matches at the commas, which are at deeper level, so
+ # that we do not get the big gaps shown here:
+
+ # my $unknown3 = pack( "v", -2 );
+ # my $unknown4 = pack( "v", 0x09 );
+ # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
+ # my $num_bbd_blocks = pack( "V", $num_lists );
+ # my $root_startblock = pack( "V", $root_start );
+ # my $unknown6 = pack( "VV", 0x00, 0x1000 );
+
+ # On the other hand, it is okay to keep matching at the same
+ # level such as in a simple list of commas and/or fat arrors.
+
+ my $is_blocked = defined( $blocking_level[$ng] )
+ && $lev > $blocking_level[$ng];
+
+ # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
+ # Do not let one or two lines with a **different number of
+ # alignments** open up a big gap in a large block. For
+ # example, we will prevent something like this, where the first
+ # line prys open the rest:
+
+ # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
+ # $worksheet->write( "C7", "", $format );
+ # $worksheet->write( "D7", "", $format );
+ # $worksheet->write( "D8", "", $format );
+ # $worksheet->write( "D8", "", $format );
+
+ # We should exclude from consideration two groups which are
+ # effectively the same but separated because one does not
+ # fit in the maximum allowed line length.
+ my $is_same_group =
+ $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
+
+ my $lines_above = $ix_beg - $ix_min;
+ my $lines_below = $lines_total - $lines_above;
+
+ # Increase the tolerable gap for certain favorable factors
+ my $factor = 1;
+ my $top_level = $lev == $group_level;
+
+ # Align best top level alignment tokens like '=', 'if', ...
+ # A factor of 10 allows a gap of up to 40 spaces
+ if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
+ $factor = 10;
+ }
+
+ # Otherwise allow some minimal padding of good alignments
+ elsif (
+
+ defined( $is_good_alignment_token{$raw_tok} )
+
+ # We have to be careful if there are just 2 lines. This
+ # two-line factor allows large gaps only for 2 lines which
+ # are simple lists with fewer items on the second line. It
+ # gives results similar to previous versions of perltidy.
+ && ( $lines_total > 2
+ || $group_list_type && $jmax < $jmax_m && $top_level )
+ )
+ {
+ $factor += 1;
+ if ($top_level) {
+ $factor += 1;
+ }
+ }
+
+ my $is_big_gap;
+ if ( !$is_same_group ) {
+ $is_big_gap ||=
+ ( $lines_above == 1
+ || $lines_above == 2 && $lines_below >= 4 )
+ && $col_want > $col + $short_pad * $factor;
+ $is_big_gap ||=
+ ( $lines_below == 1
+ || $lines_below == 2 && $lines_above >= 4 )
+ && $col > $col_want + $short_pad * $factor;
+ }
+
+ # if match is limited by gap size, stop aligning at this level
+ if ($is_big_gap) {
+ $blocking_level[$ng] = $lev - 1;
+ }
+
+ # quit and restart if it cannot join this batch
+ if ( $col_want > $col_max
+ || $col > $col_limit
+ || $is_big_gap
+ || $is_blocked )
+ {
+
+ # remember the level of the first blocking token
+ if ( !defined( $blocking_level[$ng] ) ) {
+ $blocking_level[$ng] = $lev;
+ }
+
+ $move_to_common_column->(
+ $ng_first, $ng - 1, $itok, $col_want, $raw_tok
+ );
+ $ng_first = $ng;
+ $col_want = $col;
+ $col_limit = $col_max;
+ $line_count_ng_m = $line_count_ng;
+ $jmax_m = $jmax;
+ $it_stop_m = $it_stop;
+ next;
+ }
+
+ $line_count_ng_m += $line_count_ng;
+
+ # update the common column and limit
+ if ( $col > $col_want ) { $col_want = $col }
+ if ( $col_max < $col_limit ) { $col_limit = $col_max }
+
+ } ## end loop over groups
+
+ if ( $ng_end > $ng_first ) {
+ $move_to_common_column->(
+ $ng_first, $ng_end, $itok, $col_want, $raw_tok
+ );
+ } ## end loop over groups for one task
+ } ## end loop over tasks
+
+ return;
+ }
+}
+
sub delete_selected_tokens {
my ( $line_obj, $ridel ) = @_;
+ # $line_obj is the line to be modified
+ # $ridel is a ref to list of indexes to be deleted
+
# remove an unused alignment token(s) to improve alignment chances
+
return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
- my $jmax_old = $line_obj->get_jmax();
- my $rfields_old = $line_obj->get_rfields();
- my $rpatterns_old = $line_obj->get_rpatterns();
- my $rtokens_old = $line_obj->get_rtokens();
+ my $jmax_old = $line_obj->get_jmax();
+ my $rfields_old = $line_obj->get_rfields();
+ my $rfield_lengths_old = $line_obj->get_rfield_lengths();
+ my $rpatterns_old = $line_obj->get_rpatterns();
+ my $rtokens_old = $line_obj->get_rtokens();
+ my $j_terminal_match = $line_obj->get_j_terminal_match();
+
+ use constant EXPLAIN_DELETE_SELECTED => 0;
local $" = '> <';
- 0 && print <<EOM;
+ EXPLAIN_DELETE_SELECTED && print <<EOM;
delete indexes: <@{$ridel}>
old jmax: $jmax_old
old tokens: <@{$rtokens_old}>
old patterns: <@{$rpatterns_old}>
old fields: <@{$rfields_old}>
+old field_lengths: <@{$rfield_lengths_old}>
EOM
- my $rfields_new = [];
- my $rpatterns_new = [];
- my $rtokens_new = [];
-
- my $kmax = @{$ridel} - 1;
- my $k = 0;
- my $jdel_next = $ridel->[$k];
-
- # FIXME:
- if ( $jdel_next < 0 ) { print STDERR "bad jdel_next=$jdel_next\n"; return }
- my $pattern = $rpatterns_old->[0];
- my $field = $rfields_old->[0];
- push @{$rfields_new}, $field;
- push @{$rpatterns_new}, $pattern;
+ my $rfields_new = [];
+ my $rpatterns_new = [];
+ my $rtokens_new = [];
+ my $rfield_lengths_new = [];
+
+ # Convert deletion list to a hash to allow any order, multiple entries,
+ # and avoid problems with index values out of range
+ my %delete_me;
+ @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
+
+ my $pattern = $rpatterns_old->[0];
+ my $field = $rfields_old->[0];
+ my $field_length = $rfield_lengths_old->[0];
+ push @{$rfields_new}, $field;
+ push @{$rfield_lengths_new}, $field_length;
+ push @{$rpatterns_new}, $pattern;
+
+ # Loop to either copy items or concatenate fields and patterns
+ my $jmin_del;
for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
- my $token = $rtokens_old->[$j];
- my $field = $rfields_old->[ $j + 1 ];
- my $pattern = $rpatterns_old->[ $j + 1 ];
- if ( $k > $kmax || $j < $jdel_next ) {
- push @{$rtokens_new}, $token;
- push @{$rfields_new}, $field;
- push @{$rpatterns_new}, $pattern;
- }
- elsif ( $j == $jdel_next ) {
- $rfields_new->[-1] .= $field;
+ my $token = $rtokens_old->[$j];
+ my $field = $rfields_old->[ $j + 1 ];
+ my $field_length = $rfield_lengths_old->[ $j + 1 ];
+ my $pattern = $rpatterns_old->[ $j + 1 ];
+ if ( !$delete_me{$j} ) {
+ push @{$rtokens_new}, $token;
+ push @{$rfields_new}, $field;
+ push @{$rpatterns_new}, $pattern;
+ push @{$rfield_lengths_new}, $field_length;
+ }
+ else {
+ if ( !defined($jmin_del) ) { $jmin_del = $j }
+ $rfields_new->[-1] .= $field;
+ $rfield_lengths_new->[-1] += $field_length;
$rpatterns_new->[-1] .= $pattern;
- if ( ++$k <= $kmax ) {
- my $jdel_last = $jdel_next;
- $jdel_next = $ridel->[$k];
- if ( $jdel_next < $jdel_last ) {
-
- # FIXME:
- print STDERR "bad jdel_next=$jdel_next\n";
- return;
- }
- }
}
}
$line_obj->set_rtokens($rtokens_new);
$line_obj->set_rpatterns($rpatterns_new);
$line_obj->set_rfields($rfields_new);
+ $line_obj->set_rfield_lengths($rfield_lengths_new);
$line_obj->set_jmax($jmax_new);
- 0 && print <<EOM;
+ # The value of j_terminal_match will be incorrect if we delete tokens prior
+ # to it. We will have to give up on aligning the terminal tokens if this
+ # happens.
+ if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
+ $line_obj->set_j_terminal_match(undef);
+ }
+
+ # update list type -
+ if ( $line_obj->get_list_seqno() ) {
+
+ ## This works, but for efficiency see if we need to make a change:
+ ## decide_if_list($line_obj);
+
+ # An existing list will still be a list but with possibly different
+ # leading token
+ my $old_list_type = $line_obj->get_list_type();
+ my $new_list_type = "";
+ if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
+ $new_list_type = $rtokens_new->[0];
+ }
+ if ( !$old_list_type || $old_list_type ne $new_list_type ) {
+ decide_if_list($line_obj);
+ }
+ }
+
+ EXPLAIN_DELETE_SELECTED && print <<EOM;
new jmax: $jmax_new
new tokens: <@{$rtokens_new}>
return;
}
-sub decode_alignment_token {
+{ ## closure for sub decode_alignment_token
- # Unpack the values packed in an alignment token
- #
- # Usage:
- # my ( $raw_tok, $lev, $tag, $tok_count ) =
- # decode_alignment_token($token);
-
- # Alignment tokens have a trailing decimal level and optional tag (for
- # commas):
- # For example, the first comma in the following line
- # sub banner { crlf; report( shift, '/', shift ); crlf }
- # is decorated as follows:
- # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
-
- # An optional token count may be appended with a leading dot.
- # Currently this is only done for '=' tokens but this could change.
- # For example, consider the following line:
- # $nport = $port = shift || $name;
- # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
- # The second '=' will be '=0.2' [level 0, second equals]
- my ($tok) = @_;
- my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
- if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
- $raw_tok = $1;
- $lev = $2;
- $tag = $3 if ($3);
- $tok_count = $5 if ($5);
- }
- return ( $raw_tok, $lev, $tag, $tok_count );
+ # This routine is called repeatedly for each token, so it needs to be
+ # efficient. We can speed things up by remembering the inputs and outputs
+ # in a hash.
+ my %decoded_token;
+
+ sub initialize_decode {
+
+ # We will re-initialize the hash for each file. Otherwise, there is
+ # a danger that the hash can become arbitrarily large if a very large
+ # number of files is processed at once.
+ %decoded_token = ();
+ return;
+ }
+
+ sub decode_alignment_token {
+
+ # Unpack the values packed in an alignment token
+ #
+ # Usage:
+ # my ( $raw_tok, $lev, $tag, $tok_count ) =
+ # decode_alignment_token($token);
+
+ # Alignment tokens have a trailing decimal level and optional tag (for
+ # commas):
+ # For example, the first comma in the following line
+ # sub banner { crlf; report( shift, '/', shift ); crlf }
+ # is decorated as follows:
+ # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
+
+ # An optional token count may be appended with a leading dot.
+ # Currently this is only done for '=' tokens but this could change.
+ # For example, consider the following line:
+ # $nport = $port = shift || $name;
+ # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
+ # The second '=' will be '=0.2' [level 0, second equals]
+ my ($tok) = @_;
+
+ if ( defined( $decoded_token{$tok} ) ) {
+ return @{ $decoded_token{$tok} };
+ }
+
+ my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
+ if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
+ $raw_tok = $1;
+ $lev = $2;
+ $tag = $3 if ($3);
+ $tok_count = $5 if ($5);
+ }
+ my @vals = ( $raw_tok, $lev, $tag, $tok_count );
+ $decoded_token{$tok} = \@vals;
+ return @vals;
+ }
}
-{ # sub is_deletable_token
+{ ## closure for sub delete_unmatched_tokens
- my %is_deletable_equals;
+ my %is_assignment;
+ my %keep_after_deleted_assignment;
BEGIN {
my @q;
- # These tokens with = may be deleted for vertical aligmnemt
@q = qw(
- <= >= == =~ != <=>
+ = **= += *= &= <<= &&=
+ -= /= |= >>= ||= //=
+ .= %= ^=
+ x=
);
- @is_deletable_equals{@q} = (1) x scalar(@q);
+ @is_assignment{@q} = (1) x scalar(@q);
+
+ # These tokens may be kept following an = deletion
+ @q = qw(
+ if unless or ||
+ );
+ @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
}
- sub is_deletable_token {
+ # This flag is for testing only and should normally be zero.
+ use constant TEST_DELETE_NULL => 0;
- # Determine if a token with no match possibility can be removed to
- # improve chances of making an alignment.
- my ( $token, $i, $imax, $jline, $i_eq ) = @_;
+ sub delete_unmatched_tokens {
+ my ( $rlines, $group_level ) = @_;
- my ( $raw_tok, $lev, $tag, $tok_count ) =
- decode_alignment_token($token);
+ # This is a preliminary step in vertical alignment in which we remove
+ # as many obviously un-needed alignment tokens as possible. This will
+ # prevent them from interfering with the final alignment.
+
+ # These are the return values
+ my $max_lev_diff = 0; # used to avoid a call to prune_tree
+ my $saw_side_comment = 0; # used to avoid a call for side comments
+
+ # Handle no lines -- shouldn't happen
+ return unless @{$rlines};
+
+ # Handle a single line
+ if ( @{$rlines} == 1 ) {
+ my $line = $rlines->[0];
+ my $jmax = $line->get_jmax();
+ my $length = $line->get_rfield_lengths()->[$jmax];
+ $saw_side_comment = $length > 0;
+ return ( $max_lev_diff, $saw_side_comment );
+ }
+
+ my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
+
+ # ignore hanging side comments in these operations
+ my @filtered = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
+ my $rnew_lines = \@filtered;
+
+ $saw_side_comment = @filtered != @{$rlines};
+ $max_lev_diff = 0;
+
+ # nothing to do if all lines were hanging side comments
+ my $jmax = @{$rnew_lines} - 1;
+ return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
+
+ my @equals_info;
+ my @line_info;
+ my %is_good_tok;
+
+ # create a hash of tokens for each line
+ my $rline_hashes = [];
+ foreach my $line ( @{$rnew_lines} ) {
+ my $rhash = {};
+ my $rtokens = $line->get_rtokens();
+ my $rpatterns = $line->get_rpatterns();
+ my $i = 0;
+ my ( $i_eq, $tok_eq, $pat_eq );
+ my ( $lev_min, $lev_max );
+ foreach my $tok ( @{$rtokens} ) {
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+
+ if ( $tok ne '#' ) {
+ if ( !defined($lev_min) ) {
+ $lev_min = $lev;
+ $lev_max = $lev;
+ }
+ else {
+ if ( $lev < $lev_min ) { $lev_min = $lev }
+ if ( $lev > $lev_max ) { $lev_max = $lev }
+ }
+ }
+ else {
+ if ( !$saw_side_comment ) {
+ my $length = $line->get_rfield_lengths()->[ $i + 1 ];
+ $saw_side_comment ||= $length;
+ }
+ }
- # okay to delete second and higher copies of a token
- if ( $tok_count > 1 ) { return 1 }
+ # Possible future upgrade: for multiple matches,
+ # record [$i1, $i2, ..] instead of $i
+ $rhash->{$tok} =
+ [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
- # only remove lower level commas
- if ( $raw_tok eq ',' ) {
+ # remember the first equals at line level
+ if ( !defined($i_eq) && $raw_tok eq '=' ) {
- return if ( defined($i_eq) && $i < $i_eq );
- return if ( $lev <= $group_level );
+ if ( $lev eq $group_level ) {
+ $i_eq = $i;
+ $tok_eq = $tok;
+ $pat_eq = $rpatterns->[$i];
+ }
+ }
+ $i++;
+ }
+ push @{$rline_hashes}, $rhash;
+ push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
+ push @line_info, [ $lev_min, $lev_max ];
+ if ( defined($lev_min) ) {
+ my $lev_diff = $lev_max - $lev_min;
+ if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
+ }
}
- # most operators with an equals sign should be retained if at
- # same level as this statement
- elsif ( $raw_tok =~ /=/ ) {
- return
- unless ( $lev > $group_level || $is_deletable_equals{$raw_tok} );
+ # compare each line pair and record matches
+ my $rtok_hash = {};
+ my $nr = 0;
+ for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
+ my $nl = $nr;
+ $nr = 0;
+ my $jr = $jl + 1;
+ my $rhash_l = $rline_hashes->[$jl];
+ my $rhash_r = $rline_hashes->[$jr];
+ my $count = 0; # UNUSED NOW?
+ my $ntoks = 0;
+ foreach my $tok ( keys %{$rhash_l} ) {
+ $ntoks++;
+ if ( defined( $rhash_r->{$tok} ) ) {
+ if ( $tok ne '#' ) { $count++; }
+ my $il = $rhash_l->{$tok}->[0];
+ my $ir = $rhash_r->{$tok}->[0];
+ $rhash_l->{$tok}->[2] = $ir;
+ $rhash_r->{$tok}->[1] = $il;
+ if ( $tok ne '#' ) {
+ push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
+ $nr++;
+ }
+ }
+ }
+
+ # Set a line break if no matching tokens between these lines
+ # (this is not strictly necessary now but does not hurt)
+ if ( $nr == 0 && $nl > 0 ) {
+ $rnew_lines->[$jl]->set_end_group(1);
+ }
+
+ # Also set a line break if both lines have simple equals but with
+ # different leading characters in patterns. This check is similar
+ # to one in sub check_match, and will prevent sub
+ # prune_alignment_tree from removing alignments which otherwise
+ # should be kept. This fix is rarely needed, but it can
+ # occasionally improve formatting.
+ # For example:
+ # my $name = $this->{Name};
+ # $type = $this->ctype($genlooptype) if defined $genlooptype;
+ # my $declini = ( $asgnonly ? "" : "\t$type *" );
+ # my $cast = ( $type ? "($type *)" : "" );
+ # The last two lines start with 'my' and will not match the
+ # previous line starting with $type, so we do not want
+ # prune_alignment tree to delete their ? : alignments at a deeper
+ # level.
+ my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
+ my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
+ if ( defined($i_eq_l) && defined($i_eq_r) ) {
+
+ # Also, do not align equals across a change in ci level
+ my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
+ $rnew_lines->[$jr]->get_ci_level();
+
+ if (
+ $tok_eq_l eq $tok_eq_r
+ && $i_eq_l == 0
+ && $i_eq_r == 0
+ && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
+ || $ci_jump )
+ )
+ {
+ $rnew_lines->[$jl]->set_end_group(1);
+ }
+ }
+ }
+
+ # find subgroups
+ my @subgroups;
+ push @subgroups, [ 0, $jmax ];
+ for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
+ if ( $rnew_lines->[$jl]->get_end_group() ) {
+ $subgroups[-1]->[1] = $jl;
+ push @subgroups, [ $jl + 1, $jmax ];
+ }
+ }
+
+ # flag to allow skipping pass 2
+ my $saw_large_group;
+
+ ############################################################
+ # PASS 1 over subgroups to remove unmatched alignment tokens
+ ############################################################
+ foreach my $item (@subgroups) {
+ my ( $jbeg, $jend ) = @{$item};
+
+ my $nlines = $jend - $jbeg + 1;
+
+ ####################################################
+ # Look for complete if/elsif/else and ternary blocks
+ ####################################################
+
+ # We are looking for a common '$dividing_token' like these:
+
+ # if ( $b and $s ) { $p->{'type'} = 'a'; }
+ # elsif ($b) { $p->{'type'} = 'b'; }
+ # elsif ($s) { $p->{'type'} = 's'; }
+ # else { $p->{'type'} = ''; }
+ # ^----------- dividing_token
+
+ # my $severity =
+ # !$routine ? '[PFX]'
+ # : $routine =~ /warn.*_d\z/ ? '[DS]'
+ # : $routine =~ /ck_warn/ ? 'W'
+ # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
+ # : $routine =~ /ckWARN\d*reg/ ? 'W'
+ # : $routine =~ /vWARN\d/ ? '[WDS]'
+ # : '[PFX]';
+ # ^----------- dividing_token
+
+ # Only look for groups which are more than 2 lines long. Two lines
+ # can get messed up doing this, probably due to the various
+ # two-line rules.
+
+ my $dividing_token;
+ my %token_line_count;
+ if ( $nlines > 2 ) {
+
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ my %seen;
+ my $line = $rnew_lines->[$jj];
+ my $rtokens = $line->get_rtokens();
+ foreach my $tok ( @{$rtokens} ) {
+ if ( !$seen{$tok} ) {
+ $seen{$tok}++;
+ $token_line_count{$tok}++;
+ }
+ }
+ }
+
+ foreach my $tok ( keys %token_line_count ) {
+ if ( $token_line_count{$tok} == $nlines ) {
+ if ( substr( $tok, 0, 1 ) eq '?'
+ || substr( $tok, 0, 1 ) eq '{'
+ && $tok =~ /^\{\d+if/ )
+ {
+ $dividing_token = $tok;
+ last;
+ }
+ }
+ }
+ }
+
+ #####################################################
+ # Loop over lines to remove unwanted alignment tokens
+ #####################################################
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ my $line = $rnew_lines->[$jj];
+ my $rtokens = $line->get_rtokens();
+ my $rhash = $rline_hashes->[$jj];
+ my $i_eq = $equals_info[$jj]->[0];
+ my @idel;
+ my $imax = @{$rtokens} - 2;
+ my $delete_above_level;
+ my $deleted_assignment_token;
+
+ my $saw_dividing_token = "";
+ $saw_large_group ||= $nlines > 2 && $imax > 1;
+
+ # Loop over all alignment tokens
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ next if ( $tok eq '#' ); # shouldn't happen
+ my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $rhash->{$tok} };
+
+ #######################################################
+ # Here is the basic RULE: remove an unmatched alignment
+ # which does not occur in the surrounding lines.
+ #######################################################
+ my $delete_me = !defined($il) && !defined($ir);
+
+ # Apply any user controls. Note that not all lines pass
+ # this way so they have to be applied elsewhere too.
+ my $align_ok = 1;
+ if (%valign_control_hash) {
+ $align_ok = $valign_control_hash{$raw_tok};
+ $align_ok = $valign_control_default
+ unless defined($align_ok);
+ $delete_me ||= !$align_ok;
+ }
+
+ # But now we modify this with exceptions...
+
+ # EXCEPTION 1: If we are in a complete ternary or
+ # if/elsif/else group, and this token is not on every line
+ # of the group, should we delete it to preserve overall
+ # alignment?
+ if ($dividing_token) {
+ if ( $token_line_count{$tok} >= $nlines ) {
+ $saw_dividing_token ||= $tok eq $dividing_token;
+ }
+ else {
+
+ # For shorter runs, delete toks to save alignment.
+ # For longer runs, keep toks after the '{' or '?'
+ # to allow sub-alignments within braces. The
+ # number 5 lines is arbitrary but seems to work ok.
+ $delete_me ||=
+ ( $nlines < 5 || !$saw_dividing_token );
+ }
+ }
+
+ # EXCEPTION 2: Remove all tokens above a certain level
+ # following a previous deletion. For example, we have to
+ # remove tagged higher level alignment tokens following a
+ # '=>' deletion because the tags of higher level tokens
+ # will now be incorrect. For example, this will prevent
+ # aligning commas as follows after deleting the second '=>'
+ # $w->insert(
+ # ListBox => origin => [ 270, 160 ],
+ # size => [ 200, 55 ],
+ # );
+ if ( defined($delete_above_level) ) {
+ if ( $lev > $delete_above_level ) {
+ $delete_me ||= 1; #$tag;
+ }
+ else { $delete_above_level = undef }
+ }
+
+ # EXCEPTION 3: Remove all but certain tokens after an
+ # assignment deletion.
+ if (
+ $deleted_assignment_token
+ && ( $lev > $group_level
+ || !$keep_after_deleted_assignment{$raw_tok} )
+ )
+ {
+ $delete_me ||= 1;
+ }
+
+ # EXCEPTION 4: Do not touch the first line of a 2 line
+ # terminal match, such as below, because j_terminal has
+ # already been set.
+ # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
+ # else { $tago = $tagc = ''; }
+ # But see snippets 'else1.t' and 'else2.t'
+ $delete_me = 0
+ if ( $jj == $jbeg
+ && $has_terminal_match
+ && $nlines == 2 );
+
+ # EXCEPTION 5: misc additional rules for commas and equals
+ if ($delete_me) {
+
+ # okay to delete second and higher copies of a token
+ if ( $tok_count == 1 ) {
+
+ # for a comma...
+ if ( $raw_tok eq ',' ) {
+
+ # Do not delete commas before an equals
+ $delete_me = 0
+ if ( defined($i_eq) && $i < $i_eq );
+
+ # Do not delete line-level commas
+ $delete_me = 0 if ( $lev <= $group_level );
+ }
+
+ # For an assignment at group level..
+ if ( $is_assignment{$raw_tok}
+ && $lev == $group_level )
+ {
+
+ # Do not delete if it is the last alignment of
+ # multiple tokens; this will prevent some
+ # undesirable alignments
+ if ( $imax > 0 && $i == $imax ) {
+ $delete_me = 0;
+ }
+
+ # Otherwise, set a flag to delete most
+ # remaining tokens
+ else { $deleted_assignment_token = $raw_tok }
+ }
+ }
+ }
+
+ # Do not let a user exclusion be reactivated by above rules
+ $delete_me ||= !$align_ok;
+
+ #####################################
+ # Add this token to the deletion list
+ #####################################
+ if ($delete_me) {
+ push @idel, $i;
+
+ # update deletion propagation flags
+ if ( !defined($delete_above_level)
+ || $lev < $delete_above_level )
+ {
+
+ # delete all following higher level alignments
+ $delete_above_level = $lev;
+
+ # but keep deleting after => to next lower level
+ # to avoid some bizarre alignments
+ if ( $raw_tok eq '=>' ) {
+ $delete_above_level = $lev - 1;
+ }
+ }
+ }
+ } # End loop over alignment tokens
+
+ # Process all deletion requests for this line
+ if (@idel) {
+ delete_selected_tokens( $line, \@idel );
+ }
+ } # End loopover lines
+ } # End loop over subgroups
+
+ #################################################
+ # PASS 2 over subgroups to remove null alignments
+ #################################################
+
+ # This pass is only used for testing. It is helping to identify
+ # alignment situations which might be improved with a future more
+ # general algorithm which adds a tail matching capability.
+ if (TEST_DELETE_NULL) {
+ delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
+ if ($saw_large_group);
}
- # otherwise, ok to delete the token
- return 1;
+ # PASS 3: Construct a tree of matched lines and delete some small deeper
+ # levels of tokens. They also block good alignments.
+ prune_alignment_tree($rnew_lines) if ($max_lev_diff);
+
+ # PASS 4: compare all lines for common tokens
+ match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
+
+ return ( $max_lev_diff, $saw_side_comment );
}
}
-sub delete_unmatched_tokens {
- my ($rlines) = @_;
+sub delete_null_alignments {
+ my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
+
+ # This is an optional second pass for deleting alignment tokens which can
+ # occasionally improve alignment. We look for and remove 'null
+ # alignments', which are alignments that require no padding. So we can
+ # 'cheat' and delete them. For example, notice the '=~' alignment in the
+ # first two lines of the following code:
+
+ # $sysname .= 'del' if $self->label =~ /deletion/;
+ # $sysname .= 'ins' if $self->label =~ /insertion/;
+ # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
+
+ # These '=~' tokens are already aligned because they are both the same
+ # distance from the previous alignment token, the 'if'. So we can
+ # eliminate them as alignments. The advantage is that in some cases, such
+ # as this one, this will allow other tokens to be aligned. In this case we
+ # then get the 'if' tokens to align:
+
+ # $sysname .= 'del' if $self->label =~ /deletion/;
+ # $sysname .= 'ins' if $self->label =~ /insertion/;
+ # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
+
+ # The following rules for limiting this operation have been found to
+ # work well and avoid problems:
+
+ # Rule 1. We only consider a sequence of lines which have the same
+ # sequence of alignment tokens.
+
+ # Rule 2. We never eliminate the first alignment token. One reason is that
+ # lines may have different leading indentation spaces, so keeping the
+ # first alignment token insures that our length measurements start at
+ # a well-defined point. Another reason is that nothing is gained because
+ # the left-to-right sweep can always handle alignment of this token.
+
+ # Rule 3. We require that the first alignment token exist in either
+ # a previous line or a subsequent line. The reason is that this avoids
+ # changing two-line matches which go through special logic.
+
+ # Rule 4. Do not delete a token which occurs in a previous or subsequent
+ # line. For example, in the above example, it was ok to eliminate the '=~'
+ # token from two lines because it did not occur in a surrounding line.
+ # If it did occur in a surrounding line, the result could be confusing
+ # or even incorrectly aligned.
+
+ # A consequence of these rules is that we only need to consider subgroups
+ # with at least 3 lines and 2 alignment tokens.
+
+ # The subgroup line index range
+ my ( $jbeg, $jend );
+
+ # Vars to keep track of the start of a current sequence of matching
+ # lines.
+ my $rtokens_match;
+ my $rfield_lengths_match;
+ my $j_match_beg;
+ my $j_match_end;
+ my $imax_match;
+ my $rneed_pad;
+
+ # Vars for a line being tested
+ my $rtokens;
+ my $rfield_lengths;
+ my $imax;
+
+ my $start_match = sub {
+ my ($jj) = @_;
+ $rtokens_match = $rtokens;
+ $rfield_lengths_match = $rfield_lengths;
+ $j_match_beg = $jj;
+ $j_match_end = $jj;
+ $imax_match = $imax;
+ $rneed_pad = [];
+ return;
+ };
+
+ my $add_to_match = sub {
+ my ($jj) = @_;
+ $j_match_end = $jj;
+
+ # Keep track of any padding that would be needed for each token
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ next if ( $rneed_pad->[$i] );
+ my $length = $rfield_lengths->[$i];
+ my $length_match = $rfield_lengths_match->[$i];
+ if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
+ }
+ return;
+ };
+
+ my $end_match = sub {
+ return unless ( $j_match_end > $j_match_beg );
+ my $nlines = $j_match_end - $j_match_beg + 1;
+ my $rhash_beg = $rline_hashes->[$j_match_beg];
+ my $rhash_end = $rline_hashes->[$j_match_end];
+ my @idel;
+
+ # Do not delete unless the first token also occurs in a surrounding line
+ my $tok0 = $rtokens_match->[0];
+ return
+ unless (
+ (
+ $j_match_beg > $jbeg
+ && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
+ $tok0
+ )
+ || ( $j_match_end < $jend
+ && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
+ $tok0 )
+ );
+
+ # Note that we are skipping the token at i=0
+ for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
+
+ # do not delete a token which requires padding to align
+ next if ( $rneed_pad->[$i] );
+
+ my $tok = $rtokens_match->[$i];
+
+ # Do not delete a token which occurs in a surrounding line
+ next
+ if ( $j_match_beg > $jbeg
+ && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
+ next
+ if ( $j_match_end < $jend
+ && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
+
+ # ok to delete
+ push @idel, $i;
+ ##print "ok to delete tok=$tok\n";
+ }
+ if (@idel) {
+ foreach my $j ( $j_match_beg .. $j_match_end ) {
+ delete_selected_tokens( $rnew_lines->[$j], \@idel );
+ }
+ }
+ return;
+ };
+
+ foreach my $item ( @{$rsubgroups} ) {
+ ( $jbeg, $jend ) = @{$item};
+ my $nlines = $jend - $jbeg + 1;
+ next unless ( $nlines > 2 );
- # This is a preliminary step in vertical alignment in which we remove as
- # many obviously un-needed alignment tokens as possible. This will prevent
- # them from interfering with the final alignment.
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ my $line = $rnew_lines->[$jj];
+ $rtokens = $line->get_rtokens();
+ $rfield_lengths = $line->get_rfield_lengths();
+ $imax = @{$rtokens} - 2;
+
+ # start a new match group
+ if ( $jj == $jbeg ) {
+ $start_match->($jj);
+ next;
+ }
+
+ # see if all tokens of this line match the current group
+ my $match;
+ if ( $imax == $imax_match ) {
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_match = $rtokens_match->[$i];
+ last if ( $tok ne $tok_match );
+ }
+ $match = 1;
+ }
+
+ # yes, they all match
+ if ($match) {
+ $add_to_match->($jj);
+ }
+
+ # now, this line does not match
+ else {
+ $end_match->();
+ $start_match->($jj);
+ }
+ } # End loopover lines
+ $end_match->();
+ } # End loop over subgroups
+ return;
+} ## end sub delete_null_alignments
+
+sub match_line_pairs {
+ my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
+
+ # Compare each pair of lines and save information about common matches
+ # $rlines = list of lines including hanging side comments
+ # $rnew_lines = list of lines without any hanging side comments
+ # $rsubgroups = list of subgroups of the new lines
+
+ # TODO:
+ # Maybe change: imax_pair => pair_match_info = ref to array
+ # = [$imax_align, $rMsg, ... ]
+ # This may eventually have multi-level match info
+
+ # Previous line vars
+ my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
+ $list_type_m, $ci_level_m );
+
+ # Current line vars
+ my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
+ $ci_level );
+
+ use constant EXPLAIN_COMPARE_PATTERNS => 0;
+
+ my $compare_patterns = sub {
+
+ # helper routine to decide if patterns match well enough..
+ # return code:
+ # 0 = patterns match, continue
+ # 1 = no match
+ # 2 = no match, and lines do not match at all
+
+ my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
+ my $GoToMsg = "";
+ my $return_code = 1;
+
+ my ( $alignment_token, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named containers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named
+ # containers
+ $GoToMsg = "do not align commas in unnamed containers";
+ goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
+ }
+
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
+
+ # But we can allow a match if the parens don't
+ # require any padding.
+ $GoToMsg = "do not align '(' unless patterns match or pad=0";
+ if ( $pad != 0 ) { goto NO_MATCH }
+ }
+
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
+ $GoToMsg = "first character before equals differ";
+ goto NO_MATCH;
+ }
+
+ # The introduction of sub 'prune_alignment_tree'
+ # enabled alignment of lists left of the equals with
+ # other scalar variables. For example:
+ # my ( $D, $s, $e ) = @_;
+ # my $d = length $D;
+ # my $c = $e - $s - $d;
+
+ # But this would change formatting of a lot of scripts,
+ # so for now we prevent alignment of comma lists on the
+ # left with scalars on the left. We will also prevent
+ # any partial alignments.
+
+ # set return code 2 if the = is at line level, but
+ # set return code 1 if the = is below line level, i.e.
+ # sub new { my ( $p, $v ) = @_; bless \$v, $p }
+ # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
+
+ elsif (
+ ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
+ {
+ $GoToMsg = "mixed commas/no-commas before equals";
+ if ( $lev eq $group_level ) {
+ $return_code = 2;
+ }
+ goto NO_MATCH;
+ }
+ }
+
+ MATCH:
+ return ( 0, \$GoToMsg );
+
+ NO_MATCH:
+
+ EXPLAIN_COMPARE_PATTERNS
+ && print STDERR "no match because $GoToMsg\n";
+
+ return ( $return_code, \$GoToMsg );
+
+ }; ## end of $compare_patterns->()
+
+ # loop over subgroups
+ foreach my $item ( @{$rsubgroups} ) {
+ my ( $jbeg, $jend ) = @{$item};
+ my $nlines = $jend - $jbeg + 1;
+ next unless ( $nlines > 1 );
+
+ # loop over lines in a subgroup
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+
+ $line_m = $line;
+ $rtokens_m = $rtokens;
+ $rpatterns_m = $rpatterns;
+ $rfield_lengths_m = $rfield_lengths;
+ $imax_m = $imax;
+ $list_type_m = $list_type;
+ $ci_level_m = $ci_level;
+
+ $line = $rnew_lines->[$jj];
+ $rtokens = $line->get_rtokens();
+ $rpatterns = $line->get_rpatterns();
+ $rfield_lengths = $line->get_rfield_lengths();
+ $imax = @{$rtokens} - 2;
+ $list_type = $line->get_list_type();
+ $ci_level = $line->get_ci_level();
+
+ # nothing to do for first line
+ next if ( $jj == $jbeg );
+
+ my $ci_jump = $ci_level - $ci_level_m;
+
+ my $imax_min = $imax_m < $imax ? $imax_m : $imax;
+
+ my $imax_align = -1;
- return unless @{$rlines};
- my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
+ # find number of leading common tokens
- # ignore hanging side comments in these operations
- my @filtered = grep { !$_->{_is_hanging_side_comment} } @{$rlines};
- my $rnew_lines = \@filtered;
- my @i_equals;
- my @min_levels;
+ #################################
+ # No match to hanging side comment
+ #################################
+ if ( $line->get_is_hanging_side_comment() ) {
- my $jmax = @{$rnew_lines} - 1;
+ # Should not get here; HSC's have been filtered out
+ $imax_align = -1;
+ }
+
+ ##############################
+ # Handle comma-separated lists
+ ##############################
+ elsif ( $list_type && $list_type eq $list_type_m ) {
+
+ # do not align lists across a ci jump with new list method
+ if ($ci_jump) { $imax_min = -1 }
+
+ my $i_nomatch = $imax_min + 1;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ if ( $tok ne $tok_m ) {
+ $i_nomatch = $i;
+ last;
+ }
+ }
- my %is_good_tok;
+ $imax_align = $i_nomatch - 1;
+ }
- # create a hash of tokens for each line
- my $rline_hashes = [];
- foreach my $line ( @{$rnew_lines} ) {
- my $rhash = {};
+ ##################
+ # Handle non-lists
+ ##################
+ else {
+ my $i_nomatch = $imax_min + 1;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ if ( $tok ne $tok_m ) {
+ $i_nomatch = $i;
+ last;
+ }
+
+ my $pat = $rpatterns->[$i];
+ my $pat_m = $rpatterns_m->[$i];
+
+ # If patterns don't match, we have to be careful...
+ if ( $pat_m ne $pat ) {
+ my $pad =
+ $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
+ my ( $match_code, $rmsg ) = $compare_patterns->(
+ $tok, $tok_m, $pat, $pat_m, $pad
+ );
+ if ($match_code) {
+ if ( $match_code eq 1 ) { $i_nomatch = $i }
+ elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
+ last;
+ }
+ }
+ }
+ $imax_align = $i_nomatch - 1;
+ }
+
+ $line_m->set_imax_pair($imax_align);
+
+ } ## end loop over lines
+
+ # Put fence at end of subgroup
+ $line->set_imax_pair(-1);
+
+ } ## end loop over subgroups
+
+ # if there are hanging side comments, propagate the pair info down to them
+ # so that lines can just look back one line for their pair info.
+ if ( @{$rlines} > @{$rnew_lines} ) {
+ my $last_pair_info = -1;
+ foreach my $line ( @{$rlines} ) {
+ if ( $line->get_is_hanging_side_comment() ) {
+ $line->set_imax_pair($last_pair_info);
+ }
+ else {
+ $last_pair_info = $line->get_imax_pair();
+ }
+ }
+ }
+ return;
+}
+
+sub fat_comma_to_comma {
+ my ($str) = @_;
+
+ # We are changing '=>' to ',' and removing any trailing decimal count
+ # because currently fat commas have a count and commas do not.
+ # For example, we will change '=>2+{-3.2' into ',2+{-3'
+ if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
+ return $str;
+}
+
+sub get_line_token_info {
+
+ # scan lines of tokens and return summary information about the range of
+ # levels and patterns.
+ my ($rlines) = @_;
+
+ # First scan to check monotonicity. Here is an example of several
+ # lines which are monotonic. The = is the lowest level, and
+ # the commas are all one level deeper. So this is not nonmonotonic.
+ # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
+ # $$d{"days"} = [ "d", "day", "days" ];
+ # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
+ my @all_token_info;
+ my $all_monotonic = 1;
+ for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ my ($line) = $rlines->[$jj];
my $rtokens = $line->get_rtokens();
- my $i = 0;
- my $i_eq;
- my $lev_min;
+ my $last_lev;
+ my $is_monotonic = 1;
+ my $i = -1;
foreach my $tok ( @{$rtokens} ) {
+ $i++;
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
- if ( !defined($lev_min) || $lev < $lev_min ) { $lev_min = $lev }
+ push @{ $all_token_info[$jj] },
+ [ $raw_tok, $lev, $tag, $tok_count ];
+ last if ( $tok eq '#' );
+ if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
+ $last_lev = $lev;
+ }
+ if ( !$is_monotonic ) { $all_monotonic = 0 }
+ }
- # Possible future upgrade: for multiple matches,
- # record [$i1, $i2, ..] instead of $i
- $rhash->{$tok} =
- [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
+ my $rline_values = [];
+ for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ my ($line) = $rlines->[$jj];
- # remember the first equals at line level
- if ( !defined($i_eq) && $raw_tok eq '=' ) {
- if ( $lev eq $group_level ) { $i_eq = $i }
+ my $rtokens = $line->get_rtokens();
+ my $i = -1;
+ my ( $lev_min, $lev_max );
+ my $token_pattern_max = "";
+ my %saw_level;
+ my @token_info;
+ my $is_monotonic = 1;
+
+ # find the index of the last token before the side comment
+ my $imax = @{$rtokens} - 2;
+ my $imax_true = $imax;
+
+ # If the entire group is monotonic, and the line ends in a comma list,
+ # walk it back to the first such comma. this will have the effect of
+ # making all trailing ragged comma lists match in the prune tree
+ # routine. these trailing comma lists can better be handled by later
+ # alignment rules.
+
+ # Treat fat commas the same as commas here by converting them to
+ # commas. This will improve the chance of aligning the leading parts
+ # of ragged lists.
+
+ my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
+ if ( $all_monotonic && $tok_end =~ /^,/ ) {
+ my $i = $imax - 1;
+ while ( $i >= 0
+ && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
+ {
+ $imax = $i;
+ $i--;
}
- $i++;
}
- push @{$rline_hashes}, $rhash;
- push @i_equals, $i_eq;
- push @min_levels, $lev_min;
- }
-
- # compare each line pair and record matches
- my $rtok_hash = {};
- my $nr = 0;
- for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
- my $nl = $nr;
- $nr = 0;
- my $jr = $jl + 1;
- my $rhash_l = $rline_hashes->[$jl];
- my $rhash_r = $rline_hashes->[$jr];
- my $count = 0; # UNUSED NOW?
- my $ntoks = 0;
- foreach my $tok ( keys %{$rhash_l} ) {
- $ntoks++;
- if ( defined( $rhash_r->{$tok} ) ) {
- if ( $tok ne '#' ) { $count++; }
- my $il = $rhash_l->{$tok}->[0];
- my $ir = $rhash_r->{$tok}->[0];
- $rhash_l->{$tok}->[2] = $ir;
- $rhash_r->{$tok}->[1] = $il;
- if ( $tok ne '#' ) {
- push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
- $nr++;
- }
+
+ # make a first pass to find level range
+ my $last_lev;
+ foreach my $tok ( @{$rtokens} ) {
+ $i++;
+ last if ( $i > $imax );
+ last if ( $tok eq '#' );
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $all_token_info[$jj]->[$i] };
+
+ last if ( $tok eq '#' );
+ $token_pattern_max .= $tok;
+ $saw_level{$lev}++;
+ if ( !defined($lev_min) ) {
+ $lev_min = $lev;
+ $lev_max = $lev;
+ }
+ else {
+ if ( $lev < $lev_min ) { $lev_min = $lev; }
+ if ( $lev > $lev_max ) { $lev_max = $lev; }
+ if ( $lev < $last_lev ) { $is_monotonic = 0 }
}
+ $last_lev = $lev;
}
- # Set a line break if no matching tokens between these lines
- if ( $nr == 0 && $nl > 0 ) {
- $rnew_lines->[$jl]->{_end_group} = 1;
+ # handle no levels
+ my $rtoken_patterns = {};
+ my $rtoken_indexes = {};
+ my @levs = sort keys %saw_level;
+ if ( !defined($lev_min) ) {
+ $lev_min = -1;
+ $lev_max = -1;
+ $levs[0] = -1;
+ $rtoken_patterns->{$lev_min} = "";
+ $rtoken_indexes->{$lev_min} = [];
}
- }
- # find subgroups
- my @subgroups;
- push @subgroups, [ 0, $jmax ];
- for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
- if ( $rnew_lines->[$jl]->{_end_group} ) {
- $subgroups[-1]->[1] = $jl;
- push @subgroups, [ $jl + 1, $jmax ];
+ # handle one level
+ elsif ( $lev_max == $lev_min ) {
+ $rtoken_patterns->{$lev_max} = $token_pattern_max;
+ $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
}
- }
- # Loop to process each subgroups
- foreach my $item (@subgroups) {
- my ( $jbeg, $jend ) = @{$item};
+ # handle multiple levels
+ else {
+ $rtoken_patterns->{$lev_max} = $token_pattern_max;
+ $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
- # look for complete ternary or if/elsif/else blocks
- my $nlines = $jend - $jbeg + 1;
- my %token_line_count;
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
- my %seen;
- my $line = $rnew_lines->[$jj];
- my $rtokens = $line->get_rtokens();
+ my $debug = 0;
+ my $lev_top = pop @levs; # alread did max level
+ my $itok = -1;
foreach my $tok ( @{$rtokens} ) {
- if ( !$seen{$tok} ) {
- $seen{$tok}++;
- $token_line_count{$tok}++;
+ $itok++;
+ last if ( $itok > $imax );
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $all_token_info[$jj]->[$itok] };
+ last if ( $raw_tok eq '#' );
+ foreach my $lev_test (@levs) {
+ next if ( $lev > $lev_test );
+ $rtoken_patterns->{$lev_test} .= $tok;
+ push @{ $rtoken_indexes->{$lev_test} }, $itok;
}
}
+ push @levs, $lev_top;
}
- # Look for if/else/elsif and ternary blocks
- my $is_full_block;
- foreach my $tok ( keys %token_line_count ) {
- if ( $token_line_count{$tok} == $nlines ) {
- if ( $tok =~ /^\?/ || $tok =~ /^\{\d+if/ ) {
- $is_full_block = 1;
- }
+ push @{$rline_values},
+ [
+ $lev_min, $lev_max, $rtoken_patterns, \@levs,
+ $rtoken_indexes, $is_monotonic, $imax_true, $imax,
+ ];
+
+ # debug
+ 0 && do {
+ local $" = ')(';
+ print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
+ foreach my $key ( sort keys %{$rtoken_patterns} ) {
+ print "$key => $rtoken_patterns->{$key}\n";
+ print "$key => @{$rtoken_indexes->{$key}}\n";
}
+ };
+ } ## end loop over lines
+ return ( $rline_values, $all_monotonic );
+}
+
+sub prune_alignment_tree {
+ my ($rlines) = @_;
+ my $jmax = @{$rlines} - 1;
+ return unless $jmax > 0;
+
+ # Vertical alignment in perltidy is done as an iterative process. The
+ # starting point is to mark all possible alignment tokens ('=', ',', '=>',
+ # etc) for vertical alignment. Then we have to delete all alignments
+ # which, if actually made, would detract from overall alignment. This
+ # is done in several phases of which this is one.
+
+ # In this routine we look at the alignments of a group of lines as a
+ # hierarchical tree. We will 'prune' the tree to limited depths if that
+ # will improve overall alignment at the lower depths.
+ # For each line we will be looking at its alignment patterns down to
+ # different fixed depths. For each depth, we include all lower depths and
+ # ignore all higher depths. We want to see if we can get alignment of a
+ # larger group of lines if we ignore alignments at some lower depth.
+ # Here is an # example:
+
+ # for (
+ # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
+ # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
+ # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
+ # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
+ # );
+
+ # In the above example, all lines have three commas at the lowest depth
+ # (zero), so if there were no other alignements, these lines would all
+ # align considering only the zero depth alignment token. But some lines
+ # have additional comma alignments at the next depth, so we need to decide
+ # if we should drop those to keep the top level alignments, or keep those
+ # for some additional low level alignments at the expense losing some top
+ # level alignments. In this case we will drop the deeper level commas to
+ # keep the entire collection aligned. But in some cases the decision could
+ # go the other way.
+
+ # The tree for this example at the zero depth has one node containing
+ # all four lines, since they are identical at zero level (three commas).
+ # At depth one, there are three 'children' nodes, namely:
+ # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
+ # - line 3, which has 2 commas at depth 1
+ # - line4, which has a ';' and a ',' at depth 1
+ # There are no deeper alignments in this example.
+ # so the tree structure for this example is:
+ #
+ # depth 0 depth 1 depth 2
+ # [lines 1-4] -- [line 1-2] - (empty)
+ # | [line 3] - (empty)
+ # | [line 4] - (empty)
+
+ # We can carry this to any depth, but it is not really useful to go below
+ # depth 2. To cleanly stop there, we will consider depth 2 to contain all
+ # alignments at depth >=2.
+
+ use constant EXPLAIN_PRUNE => 0;
+
+ ####################################################################
+ # Prune Tree Step 1. Start by scanning the lines and collecting info
+ ####################################################################
+
+ # Note that the caller had this info but we have to redo this now because
+ # alignment tokens may have been deleted.
+ my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
+
+ # If all the lines have levels which increase monotonically from left to
+ # right, then the sweep-left-to-right pass can do a better job of alignment
+ # than pruning, and without deleting alignments.
+ return if ($all_monotonic);
+
+ # Contents of $rline_values
+ # [
+ # $lev_min, $lev_max, $rtoken_patterns, \@levs,
+ # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
+ # ];
+
+ # We can work to any depth, but there is little advantage to working
+ # to a a depth greater than 2
+ my $MAX_DEPTH = 2;
+
+ # This arrays will hold the tree of alignment tokens at different depths
+ # for these lines.
+ my @match_tree;
+
+ # Tree nodes contain these values:
+ # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
+ # $nc_beg_p, $nc_end_p, $rindexes];
+ # where
+ # $depth = 0,1,2 = index of depth of the match
+
+ # $jbeg beginning index j of the range of lines in this match
+ # $jend ending index j of the range of lines in this match
+ # $n_parent = index of the containing group at $depth-1, if it exists
+ # $level = actual level of code being matched in this group
+ # $pattern = alignment pattern being matched
+ # $nc_beg_p = first child
+ # $nc_end_p = last child
+ # $rindexes = ref to token indexes
+
+ # the patterns and levels of the current group being formed at each depth
+ my ( @token_patterns_current, @levels_current, @token_indexes_current );
+
+ # the patterns and levels of the next line being tested at each depth
+ my ( @token_patterns_next, @levels_next, @token_indexes_next );
+
+ #########################################################
+ # define a recursive worker subroutine for tree construction
+ #########################################################
+
+ # This is a recursive routine which is called if a match condition changes
+ # at any depth when a new line is encountered. It ends the match node
+ # which changed plus all deeper nodes attached to it.
+ my $end_node;
+ $end_node = sub {
+ my ( $depth, $jl, $n_parent ) = @_;
+
+ # $depth is the tree depth
+ # $jl is the index of the line
+ # $n_parent is index of the parent node of this node
+
+ return if ( $depth > $MAX_DEPTH );
+
+ # end any current group at this depth
+ if ( $jl >= 0
+ && defined( $match_tree[$depth] )
+ && @{ $match_tree[$depth] }
+ && defined( $levels_current[$depth] ) )
+ {
+ $match_tree[$depth]->[-1]->[1] = $jl;
}
- # remove unwanted alignment tokens
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
- my $line = $rnew_lines->[$jj];
- my $rtokens = $line->get_rtokens();
- my $rhash = $rline_hashes->[$jj];
- my $i = 0;
- my $i_eq = $i_equals[$jj];
- my @idel;
- my $imax = @{$rtokens} - 2;
- my $delete_above_level;
+ # Define the index of the node we will create below
+ my $ng_self = 0;
+ if ( defined( $match_tree[$depth] ) ) {
+ $ng_self = @{ $match_tree[$depth] };
+ }
- for ( my $i = 0 ; $i <= $imax ; $i++ ) {
- my $tok = $rtokens->[$i];
- next if ( $tok eq '#' ); # shouldn't happen
- my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
- @{ $rhash->{$tok} };
-
- # always remove unmatched tokens
- my $delete_me = !defined($il) && !defined($ir);
-
- # also, if this is a complete ternary or if/elsif/else block,
- # remove all alignments which are not also in every line
- $delete_me ||=
- ( $is_full_block && $token_line_count{$tok} < $nlines );
-
- # Remove all tokens above a certain level following a previous
- # deletion. For example, we have to remove tagged higher level
- # alignment tokens following a => deletion because the tags of
- # higher level tokens will now be incorrect. For example, this
- # will prevent aligning commas as follows after deleting the
- # second =>
- # $w->insert(
- # ListBox => origin => [ 270, 160 ],
- # size => [ 200, 55 ],
- # );
- if ( defined($delete_above_level) ) {
- if ( $lev > $delete_above_level ) {
- $delete_me ||= 1; #$tag;
- }
- else { $delete_above_level = undef }
- }
+ # end any next deeper child node(s)
+ $end_node->( $depth + 1, $jl, $ng_self );
- if (
- $delete_me
- && is_deletable_token( $tok, $i, $imax, $jj, $i_eq )
+ # update the levels being matched
+ $token_patterns_current[$depth] = $token_patterns_next[$depth];
+ $token_indexes_current[$depth] = $token_indexes_next[$depth];
+ $levels_current[$depth] = $levels_next[$depth];
- # Patch: do not touch the first line of a terminal match,
- # such as below, because j_terminal has already been set.
- # if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
- # else { $tago = $tagc = ''; }
- # But see snippets 'else1.t' and 'else2.t'
- && !( $jj == $jbeg && $has_terminal_match && $nlines == 2 )
+ # Do not start a new group at this level if it is not being used
+ if ( !defined( $levels_next[$depth] )
+ || $depth > 0
+ && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
+ {
+ return;
+ }
- )
- {
- push @idel, $i;
- if ( !defined($delete_above_level)
- || $lev < $delete_above_level )
- {
+ # Create a node for the next group at this depth. We initially assume
+ # that it will continue to $jmax, and correct that later if the node
+ # ends earlier.
+ push @{ $match_tree[$depth] },
+ [
+ $jl + 1, $jmax, $n_parent, $levels_current[$depth],
+ $token_patterns_current[$depth],
+ undef, undef, $token_indexes_current[$depth],
+ ];
- # delete all following higher level alignments
- $delete_above_level = $lev;
+ return;
+ }; ## end sub end_node
+
+ ######################################################
+ # Prune Tree Step 2. Loop to form the tree of matches.
+ ######################################################
+ for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
+
+ # working with two adjacent line indexes, 'm'=minus, 'p'=plus
+ my $jm = $jp - 1;
+
+ # Pull out needed values for the next line
+ my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
+ $is_monotonic, $imax_true, $imax )
+ = @{ $rline_values->[$jp] };
+
+ # Transfer levels and patterns for this line to the working arrays.
+ # If the number of levels differs from our chosen MAX_DEPTH ...
+ # if fewer than MAX_DEPTH: leave levels at missing depths undefined
+ # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
+ @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
+ if ( @{$rlevs} > $MAX_DEPTH ) {
+ $levels_next[$MAX_DEPTH] = $rlevs->[-1];
+ }
+ my $depth = 0;
+ foreach (@levels_next) {
+ $token_patterns_next[$depth] =
+ defined($_) ? $rtoken_patterns->{$_} : undef;
+ $token_indexes_next[$depth] =
+ defined($_) ? $rtoken_indexes->{$_} : undef;
+ $depth++;
+ }
- # but keep deleting after => to next lower level
- # to avoid some bizarre alignments
- if ( $raw_tok eq '=>' ) {
- $delete_above_level = $lev - 1;
- }
+ # Look for a change in match groups...
+
+ # Initialize on the first line
+ if ( $jp == 0 ) {
+ my $n_parent;
+ $end_node->( 0, $jm, $n_parent );
+ }
+
+ # End groups if a hard flag has been set
+ elsif ( $rlines->[$jm]->get_end_group() ) {
+ my $n_parent;
+ $end_node->( 0, $jm, $n_parent );
+ }
+
+ # Continue at hanging side comment
+ elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
+ next;
+ }
+
+ # Otherwise see if anything changed and update the tree if so
+ else {
+ foreach my $depth ( 0 .. $MAX_DEPTH ) {
+
+ my $def_current = defined( $token_patterns_current[$depth] );
+ my $def_next = defined( $token_patterns_next[$depth] );
+ last unless ( $def_current || $def_next );
+ if ( !$def_current
+ || !$def_next
+ || $token_patterns_current[$depth] ne
+ $token_patterns_next[$depth] )
+ {
+ my $n_parent;
+ if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
+ $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
}
+ $end_node->( $depth, $jm, $n_parent );
+ last;
}
}
+ }
+ } ## end loop to form tree of matches
+
+ ##########################################################
+ # Prune Tree Step 3. Make links from parent to child nodes
+ ##########################################################
+
+ # It seemed cleaner to do this as a separate step rather than during tree
+ # construction. The children nodes have links up to the parent node which
+ # created them. Now make links in the opposite direction, so the parents
+ # can find the children. We store the range of children nodes ($nc_beg,
+ # $nc_end) of each parent with two additional indexes in the orignal array.
+ # These will be undef if no children.
+ for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
+ next unless defined( $match_tree[$depth] );
+ my $nc_max = @{ $match_tree[$depth] } - 1;
+ my $np_now;
+ foreach my $nc ( 0 .. $nc_max ) {
+ my $np = $match_tree[$depth]->[$nc]->[2];
+ if ( !defined($np) ) {
+
+ # shouldn't happen
+ #print STDERR "lost child $np at depth $depth\n";
+ next;
+ }
+ if ( !defined($np_now) || $np != $np_now ) {
+ $np_now = $np;
+ $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
+ }
+ $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
+ }
+ } ## end loop to make links down to the child nodes
- if (@idel) { delete_selected_tokens( $line, \@idel ) }
+ EXPLAIN_PRUNE > 0 && do {
+ print "Tree complete. Found these groups:\n";
+ foreach my $depth ( 0 .. $MAX_DEPTH ) {
+ Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
}
- } # End loop over subgroups
+ };
+
+ #######################################################
+ # Prune Tree Step 4. Make a list of nodes to be deleted
+ #######################################################
+
+ # list of lines with tokens to be deleted:
+ # [$jbeg, $jend, $level_keep]
+ # $jbeg..$jend is the range of line indexes,
+ # $level_keep is the minimum level to keep
+ my @delete_list;
+
+ # Groups with ending comma lists and their range of sizes:
+ # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
+ my %ragged_comma_group;
+ # Define a threshold line count for forcing a break
+ my $nlines_break = 3;
+
+ # We work with a list of nodes to visit at the next deeper depth.
+ my @todo_list;
+ if ( defined( $match_tree[0] ) ) {
+ @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
+ }
+
+ for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
+ last unless (@todo_list);
+ my @todo_next;
+ foreach my $np (@todo_list) {
+ my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
+ $rindexes_p )
+ = @{ $match_tree[$depth]->[$np] };
+ my $nlines_p = $jend_p - $jbeg_p + 1;
+
+ # nothing to do if no children
+ next unless defined($nc_beg_p);
+
+ # Define the number of lines to either keep or delete a child node.
+ # This is the key decision we have to make. We want to delete
+ # short runs of matched lines, and keep long runs. It seems easier
+ # for the eye to follow breaks in monotonic level changes than
+ # non-monotonic level changes. For example, the following looks
+ # best if we delete the lower level alignments:
+
+ # [1] ~~ [];
+ # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
+ # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
+ # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
+ # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
+ # $deep1 ~~ $deep1;
+
+ # So we will use two thresholds.
+ my $nmin_mono = $depth + 2;
+ my $nmin_non_mono = $depth + 6;
+ if ( $nmin_mono > $nlines_p - 1 ) {
+ $nmin_mono = $nlines_p - 1;
+ }
+ if ( $nmin_non_mono > $nlines_p - 1 ) {
+ $nmin_non_mono = $nlines_p - 1;
+ }
+
+ # loop to keep or delete each child node
+ foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
+ my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
+ $nc_end_c )
+ = @{ $match_tree[ $depth + 1 ]->[$nc] };
+ my $nlines_c = $jend_c - $jbeg_c + 1;
+ my $is_monotonic = $rline_values->[$jbeg_c]->[5];
+ my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
+ if ( $nlines_c < $nmin ) {
+##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
+ push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
+ }
+ else {
+##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
+ push @todo_next, $nc;
+ }
+ }
+ }
+ @todo_list = @todo_next;
+ } ## end loop to mark nodes to delete
+
+ #############################################################
+ # Prune Tree Step 5. Loop to delete selected alignment tokens
+ #############################################################
+ foreach my $item (@delete_list) {
+ my ( $jbeg, $jend, $level_keep ) = @{$item};
+ foreach my $jj ( $jbeg .. $jend ) {
+ my $line = $rlines->[$jj];
+ my @idel;
+ my $rtokens = $line->get_rtokens();
+ my $imax = @{$rtokens} - 2;
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+ if ( $lev > $level_keep ) {
+ push @idel, $i;
+ }
+ }
+ if (@idel) {
+ delete_selected_tokens( $line, \@idel );
+ }
+ }
+ } ## end loop to delete selected alignment tokens
+
+ return;
+} ## end sub prune_alignment_tree
+
+sub Dump_tree_groups {
+ my ( $rgroup, $msg ) = @_;
+ print "$msg\n";
+ local $" = ')(';
+ foreach my $item ( @{$rgroup} ) {
+ my @fix = @{$item};
+ foreach (@fix) { $_ = "undef" unless defined $_; }
+ $fix[4] = "...";
+ print "(@fix)\n";
+ }
return;
}
-{ # decide_if_aligned_pair
+{ ## closure for sub is_marginal_match
my %is_if_or;
my %is_assignment;
+ my %is_good_alignment;
+
+ # This test did not give sufficiently better results to use as an update,
+ # but the flag is worth keeping as a starting point for future testing.
+ use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
BEGIN {
my @q = qw(
- if or ||
+ if unless or ||
);
@is_if_or{@q} = (1) x scalar(@q);
x=
);
@is_assignment{@q} = (1) x scalar(@q);
+
+ # Vertically aligning on certain "good" tokens is usually okay
+ # so we can be less restrictive in marginal cases.
+ @q = qw( { ? => = );
+ push @q, (',');
+ @is_good_alignment{@q} = (1) x scalar(@q);
}
- sub decide_if_aligned_pair {
+ sub is_marginal_match {
- # Do not try to align two lines which are not really similar
- return unless ( @group_lines == 2 );
- return if ($is_matching_terminal_line);
+ my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
- # always align lists
- my $group_list_type = $group_lines[0]->get_list_type();
- return 0 if ($group_list_type);
+ # Decide if we should undo some or all of the common alignments of a
+ # group of just two lines.
+
+ # Given:
+ # $line_0 and $line_1 - the two lines
+ # $group_level = the indentation level of the group being processed
+ # $imax_align = the maximum index of the common alignment tokens
+ # of the two lines
+ # $imax_prev = the maximum index of the common alignment tokens
+ # with the line before $line_0 (=-1 of does not exist)
- my $jmax0 = $group_lines[0]->get_jmax();
- my $jmax1 = $group_lines[1]->get_jmax();
- my $rtokens = $group_lines[0]->get_rtokens();
- my $leading_equals = ( $rtokens->[0] =~ /=/ );
+ # Return:
+ # $is_marginal = true if the two lines should NOT be fully aligned
+ # = false if the two lines can remain fully aligned
+ # $imax_align = the index of the highest alignment token shared by
+ # these two lines to keep if the match is marginal.
- # scan the tokens on the second line
- my $rtokens1 = $group_lines[1]->get_rtokens();
- my $saw_if_or; # if we saw an 'if' or 'or' at group level
+ # When we have an alignment group of just two lines like this, we are
+ # working in the twilight zone of what looks good and what looks bad.
+ # This routine is a collection of rules which work have been found to
+ # work fairly well, but it will need to be updated from time to time.
+
+ my $is_marginal = 0;
+
+ # always keep alignments of a terminal else or ternary
+ goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
+
+ # always align lists
+ my $group_list_type = $line_0->get_list_type();
+ goto RETURN if ($group_list_type);
+
+ # always align hanging side comments
+ my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
+ goto RETURN if ($is_hanging_side_comment);
+
+ my $jmax_0 = $line_0->get_jmax();
+ my $jmax_1 = $line_1->get_jmax();
+ my $rtokens_1 = $line_1->get_rtokens();
+ my $rtokens_0 = $line_0->get_rtokens();
+ my $rfield_lengths_0 = $line_0->get_rfield_lengths();
+ my $rfield_lengths_1 = $line_1->get_rfield_lengths();
+ my $rpatterns_0 = $line_0->get_rpatterns();
+ my $rpatterns_1 = $line_1->get_rpatterns();
+ my $imax_next = $line_1->get_imax_pair();
+
+ # We will scan the alignment tokens and set a flag '$is_marginal' if
+ # it seems that the an alignment would look bad.
+ my $max_pad = 0;
+ my $saw_good_alignment = 0;
+ my $saw_if_or; # if we saw an 'if' or 'or' at group level
my $raw_tokb = ""; # first token seen at group level
- for ( my $j = 0 ; $j < $jmax1 - 1 ; $j++ ) {
+ my $jfirst_bad;
+ my $line_ending_fat_comma; # is last token just a '=>' ?
+ my $j0_eq_pad;
+ my $j0_max_pad = 0;
+
+ for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
my ( $raw_tok, $lev, $tag, $tok_count ) =
- decode_alignment_token( $rtokens1->[$j] );
+ decode_alignment_token( $rtokens_1->[$j] );
if ( $raw_tok && $lev == $group_level ) {
if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
$saw_if_or ||= $is_if_or{$raw_tok};
}
- }
- # A marginal match is a match which has different patterns. Normally,
- # we should not allow exactly two lines to match if marginal. But
- # we can allow matching in some specific cases.
- my $is_marginal = $marginal_match;
+ # When the first of the two lines ends in a bare '=>' this will
+ # probably be marginal match. (For a bare =>, the next field length
+ # will be 2 or 3, depending on side comment)
+ $line_ending_fat_comma =
+ $j == $jmax_1 - 2
+ && $raw_tok eq '=>'
+ && $rfield_lengths_0->[ $j + 1 ] <= 3;
+
+ my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
+ if ( $j == 0 ) {
+ $pad += $line_1->get_leading_space_count() -
+ $line_0->get_leading_space_count();
+
+ # Remember the pad at a leading equals
+ if ( $raw_tok eq '=' && $lev == $group_level ) {
+ $j0_eq_pad = $pad;
+ $j0_max_pad =
+ 0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
+ $j0_max_pad = 4 if ( $j0_max_pad < 4 );
+ }
+ }
- # lines with differing number of alignment tokens are marginal
- $is_marginal ||=
- $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
- && !$is_assignment{$raw_tokb};
+ if ( $pad < 0 ) { $pad = -$pad }
+ if ( $pad > $max_pad ) { $max_pad = $pad }
+ if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
+ $saw_good_alignment = 1;
+ }
+ else {
+ $jfirst_bad = $j unless defined($jfirst_bad);
+ }
+ if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
+
+ # Flag this as a marginal match since patterns differ.
+ # Normally, we will not allow just two lines to match if
+ # marginal. But we can allow matching in some specific cases.
+
+ $jfirst_bad = $j if ( !defined($jfirst_bad) );
+ $is_marginal = 1 if ( $is_marginal == 0 );
+ if ( $raw_tok eq '=' ) {
+
+ # Here is an example of a marginal match:
+ # $done{$$op} = 1;
+ # $op = compile_bblock($op);
+ # The left tokens are both identifiers, but
+ # one accesses a hash and the other doesn't.
+ # We'll let this be a tentative match and undo
+ # it later if we don't find more than 2 lines
+ # in the group.
+ $is_marginal = 2;
+ }
+ }
+ }
+
+ $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
+
+ # Turn off the "marginal match" flag in some cases...
+ # A "marginal match" occurs when the alignment tokens agree
+ # but there are differences in the other tokens (patterns).
+ # If we leave the marginal match flag set, then the rule is that we
+ # will align only if there are more than two lines in the group.
+ # We will turn of the flag if we almost have a match
+ # and either we have seen a good alignment token or we
+ # just need a small pad (2 spaces) to fit. These rules are
+ # the result of experimentation. Tokens which misaligned by just
+ # one or two characters are annoying. On the other hand,
+ # large gaps to less important alignment tokens are also annoying.
+ if ( $is_marginal == 1
+ && ( $saw_good_alignment || $max_pad < 3 ) )
+ {
+ $is_marginal = 0;
+ }
# We will use the line endings to help decide on alignments...
# See if the lines end with semicolons...
- my $rpatterns0 = $group_lines[0]->get_rpatterns();
- my $rpatterns1 = $group_lines[1]->get_rpatterns();
my $sc_term0;
my $sc_term1;
- if ( $jmax0 < 1 || $jmax1 < 1 ) {
+ if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
# shouldn't happen
}
else {
- my $pat0 = $rpatterns0->[ $jmax0 - 1 ];
- my $pat1 = $rpatterns1->[ $jmax1 - 1 ];
+ my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
+ my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
$sc_term0 = $pat0 =~ /;b?$/;
$sc_term1 = $pat1 =~ /;b?$/;
}
# grep { /$handles/ } $self->_get_delegate_method_list;
$is_marginal ||=
( $raw_tokb eq '(' || $raw_tokb eq '{' )
- && $jmax1 == 2
+ && $jmax_1 == 2
&& $sc_term0 ne $sc_term1;
+ ########################################
+ # return unless this is a marginal match
+ ########################################
+ goto RETURN if ( !$is_marginal );
+
# Undo the marginal match flag in certain cases,
- if ($is_marginal) {
-
- # Two lines with a leading equals-like operator are allowed to
- # align if the patterns to the left of the equals are the same.
- # For example the following two lines are a marginal match but have
- # the same left side patterns, so we will align the equals.
- # my $orig = my $format = "^<<<<< ~~\n";
- # my $abc = "abc";
- # But these have a different left pattern so they will not be
- # aligned
- # $xmldoc .= $`;
- # $self->{'leftovers'} .= "<bx-seq:seq" . $';
-
- # First line semicolon terminated but second not, usually ok:
- # my $want = "'ab', 'a', 'b'";
- # my $got = join( ", ",
- # map { defined($_) ? "'$_'" : "undef" }
- # @got );
- # First line not semicolon terminated, Not OK to match:
- # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
- # $$href{-NUM_DIRS} = 0;
- my $pat0 = $rpatterns0->[0];
- my $pat1 = $rpatterns1->[0];
-
- ##########################################################
- # Turn off the marginal flag for some types of assignments
- ##########################################################
- if ( $is_assignment{$raw_tokb} ) {
- # undo marginal flag if first line is semicolon terminated
- # and leading patters match
- if ($sc_term0) { # && $sc_term1) {
- $is_marginal = $pat0 ne $pat1;
- }
+ # Two lines with a leading equals-like operator are allowed to
+ # align if the patterns to the left of the equals are the same.
+ # For example the following two lines are a marginal match but have
+ # the same left side patterns, so we will align the equals.
+ # my $orig = my $format = "^<<<<< ~~\n";
+ # my $abc = "abc";
+ # But these have a different left pattern so they will not be
+ # aligned
+ # $xmldoc .= $`;
+ # $self->{'leftovers'} .= "<bx-seq:seq" . $';
+
+ # First line semicolon terminated but second not, usually ok:
+ # my $want = "'ab', 'a', 'b'";
+ # my $got = join( ", ",
+ # map { defined($_) ? "'$_'" : "undef" }
+ # @got );
+ # First line not semicolon terminated, Not OK to match:
+ # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
+ # $$href{-NUM_DIRS} = 0;
+ my $pat0 = $rpatterns_0->[0];
+ my $pat1 = $rpatterns_1->[0];
+
+ ##########################################################
+ # Turn off the marginal flag for some types of assignments
+ ##########################################################
+ if ( $is_assignment{$raw_tokb} ) {
+
+ # undo marginal flag if first line is semicolon terminated
+ # and leading patters match
+ if ($sc_term0) { # && $sc_term1) {
+ $is_marginal = $pat0 ne $pat1;
}
- elsif ( $raw_tokb eq '=>' ) {
+ }
+ elsif ( $raw_tokb eq '=>' ) {
+
+ # undo marginal flag if patterns match
+ $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
+ }
+ elsif ( $raw_tokb eq '=~' ) {
- # undo marginal flag if patterns match
+ # undo marginal flag if both lines are semicolon terminated
+ # and leading patters match
+ if ( $sc_term1 && $sc_term0 ) {
$is_marginal = $pat0 ne $pat1;
}
- elsif ( $raw_tokb eq '=~' ) {
+ }
- # undo marginal flag if both lines are semicolon terminated
- # and leading patters match
- if ( $sc_term1 && $sc_term0 ) {
- $is_marginal = $pat0 ne $pat1;
- }
+ ######################################################
+ # Turn off the marginal flag if we saw an 'if' or 'or'
+ ######################################################
+
+ # A trailing 'if' and 'or' often gives a good alignment
+ # For example, we can align these:
+ # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
+ # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
+
+ # or
+ # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
+ # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
+
+ if ($saw_if_or) {
+
+ # undo marginal flag if both lines are semicolon terminated
+ if ( $sc_term0 && $sc_term1 ) {
+ $is_marginal = 0;
}
+ }
+
+ # For a marginal match, only keep matches before the first 'bad' match
+ if ( $is_marginal
+ && defined($jfirst_bad)
+ && $imax_align > $jfirst_bad - 1 )
+ {
+ $imax_align = $jfirst_bad - 1;
+ }
+
+ ###########################################################
+ # Allow sweep to match lines with leading '=' in some cases
+ ###########################################################
+ if ( $imax_align < 0 && defined($j0_eq_pad) ) {
- ######################################################
- # Turn off the marginal flag if we saw an 'if' or 'or'
- ######################################################
+ if (
- # A trailing 'if' and 'or' often gives a good alignment
- # For example, we can align these:
- # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
- # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
+ # If there is a following line with leading equals, or
+ # preceding line with leading equals, then let the sweep align
+ # them without restriction. For example, the first two lines
+ # here are a marginal match, but they are followed by a line
+ # with leading equals, so the sweep-lr logic can align all of
+ # the lines:
+
+ # $date[1] = $month_to_num{ $date[1] }; # <--line_0
+ # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
+ # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
+ # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
+
+ # Likewise, if we reverse the two pairs we want the same result
+
+ # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
+ # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
+ # $date[1] = $month_to_num{ $date[1] }; # <--line_0
+ # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
+
+ (
+ $imax_next >= 0
+ || $imax_prev >= 0
+ || TEST_MARGINAL_EQ_ALIGNMENT
+ )
+ && $j0_eq_pad >= -$j0_max_pad
+ && $j0_eq_pad <= $j0_max_pad
+ )
+ {
- # or
- # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
- # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
+ # But do not do this if there is a comma before the '='.
+ # For example, the first two lines below have commas and
+ # therefore are not allowed to align with lines 3 & 4:
- if ($saw_if_or) {
+ # my ( $x, $y ) = $self->Size(); #<--line_0
+ # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
+ # my $vx = $right - $left;
+ # my $vy = $bottom - $top;
- # undo marginal flag if both lines are semicolon terminated
- if ( $sc_term0 && $sc_term1 ) {
- $is_marginal = 0;
+ if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
+ $imax_align = 0;
}
}
}
- ###############################
- # Set the return flag:
- # Don't align if still marginal
- ###############################
- my $do_not_align = $is_marginal;
+ RETURN:
+ return ( $is_marginal, $imax_align );
+ }
+}
- # But try to convert them into a simple comment group if the first line
- # a has side comment
- my $rfields = $group_lines[0]->get_rfields();
- my $maximum_field_index = $group_lines[0]->get_jmax();
- if ( $do_not_align
- && ( length( $rfields->[$maximum_field_index] ) > 0 ) )
- {
- combine_fields();
- $do_not_align = 0;
+sub get_extra_leading_spaces {
+
+ my ( $rlines, $rgroups ) = @_;
+
+ #----------------------------------------------------------
+ # Define any extra indentation space (for the -lp option).
+ # Here is why:
+ # If a list has side comments, sub scan_list must dump the
+ # list before it sees everything. When this happens, it sets
+ # the indentation to the standard scheme, but notes how
+ # many spaces it would have liked to use. We may be able
+ # to recover that space here in the event that all of the
+ # lines of a list are back together again.
+ #----------------------------------------------------------
+
+ return 0 unless ( @{$rlines} && @{$rgroups} );
+
+ my $object = $rlines->[0]->get_indentation();
+ return 0 unless ( ref($object) );
+ my $extra_leading_spaces = 0;
+ my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
+ return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
+
+ my $min_spaces = $extra_indentation_spaces_wanted;
+ if ( $min_spaces > 0 ) { $min_spaces = 0 }
+
+ # loop over all groups
+ my $ng = -1;
+ my $ngroups = @{$rgroups};
+ foreach my $item ( @{$rgroups} ) {
+ $ng++;
+ my ( $jbeg, $jend ) = @{$item};
+ foreach my $j ( $jbeg .. $jend ) {
+ next if ( $j == 0 );
+
+ # all indentation objects must be the same
+ if ( $object != $rlines->[$j]->get_indentation() ) {
+ return 0;
+ }
+ }
+
+ # find the maximum space without exceeding the line length for this group
+ my $avail = $rlines->[$jbeg]->get_available_space_on_right();
+ my $spaces =
+ ( $avail > $extra_indentation_spaces_wanted )
+ ? $extra_indentation_spaces_wanted
+ : $avail;
+
+ #########################################################
+ # Note: min spaces can be negative; for example with -gnu
+ # f(
+ # do { 1; !!(my $x = bless []); }
+ # );
+ #########################################################
+ # The following rule is needed to match older formatting:
+ # For multiple groups, we will keep spaces non-negative.
+ # For a single group, we will allow a negative space.
+ if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
+
+ # update the minimum spacing
+ if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
+ $extra_leading_spaces = $spaces;
}
- return $do_not_align;
}
+
+ # update the indentation object because with -icp the terminal
+ # ');' will use the same adjustment.
+ $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
+ return $extra_leading_spaces;
}
-sub adjust_side_comment {
+sub forget_side_comment {
+ my ($self) = @_;
+ $self->[_last_side_comment_column_] = 0;
+ return;
+}
+
+sub is_good_side_comment_column {
+ my ( $self, $line, $line_number, $level, $num5 ) = @_;
+
+ # Upon encountering the first side comment of a group, decide if
+ # a previous side comment should be forgotten. This involves
+ # checking several rules.
+
+ # Return true to keep old comment location
+ # Return false to forget old comment location
+
+ my $rfields = $line->get_rfields();
+ my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
+
+ # RULE1: Never forget comment before a hanging side comment
+ goto KEEP if ($is_hanging_side_comment);
+
+ # RULE2: Forget a side comment after a short line difference,
+ # where 'short line difference' is computed from a formula.
+ # Using a smooth formula helps minimize sudden large changes.
+ my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
+ my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
+
+ # '$num5' is the number of comments in the first 5 lines after the first
+ # comment. It is needed to keep a compact group of side comments from
+ # being influenced by a more distant side comment.
+ $num5 = 1 unless ($num5);
+
+ # Some values:
+
+ # $adiff $num5 $short_diff
+ # 0 * 12
+ # 1 1 6
+ # 1 2 4
+ # 1 3 3
+ # 1 4 2
+ # 2 1 4
+ # 2 2 2
+ # 2 3 1
+ # 3 1 3
+ # 3 2 1
+
+ my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
+
+ goto FORGET
+ if ( $line_diff > $short_diff
+ || !$self->[_rOpts_valign_side_comments_] );
+
+ # RULE3: Forget a side comment if this line is at lower level and
+ # ends a block
+ my $last_sc_level = $self->[_last_side_comment_level_];
+ goto FORGET
+ if ( $level < $last_sc_level
+ && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
+
+ # RULE 4: Forget the last side comment if this comment might join a cached
+ # line ...
+ if ( my $cached_line_type = get_cached_line_type() ) {
+
+ # ... otherwise side comment alignment will get messed up.
+ # For example, in the following test script
+ # with using 'perltidy -sct -act=2', the last comment would try to
+ # align with the previous and then be in the wrong column when
+ # the lines are combined:
+
+ # foreach $line (
+ # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
+ # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
+ # [0, 4, 8], [2, 4, 6]
+ # ) # diagonals
+ goto FORGET
+ if ( $cached_line_type == 2 || $cached_line_type == 4 );
+ }
- my $do_not_align = shift;
+ # Otherwise, keep it alive
+ goto KEEP;
- # let's see if we can move the side comment field out a little
- # to improve readability (the last field is always a side comment field)
- my $have_side_comment = 0;
- my $first_side_comment_line = -1;
- my $maximum_field_index = $group_lines[0]->get_jmax();
- my $i = 0;
- foreach my $line (@group_lines) {
- if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
- $have_side_comment = 1;
- $first_side_comment_line = $i;
- last;
+ FORGET:
+ return 0;
+
+ KEEP:
+ return 1;
+}
+
+sub align_side_comments {
+
+ my ( $self, $rlines, $rgroups ) = @_;
+
+ # Align any side comments in this batch of lines
+
+ # Given:
+ # $rlines - the lines
+ # $rgroups - the partition of the lines into groups
+ #
+ # We will be working group-by-group because all side comments
+ # (real or fake) in each group are already aligned. So we just have
+ # to make alignments between groups wherever possible.
+
+ # An unusual aspect is that within each group we have aligned both real
+ # and fake side comments. This has the consequence that the lengths of
+ # long lines without real side comments can cause 'push' all side comments
+ # to the right. This seems unusual, but testing with and without this
+ # feature shows that it is usually better this way. Othewise, side
+ # comments can be hidden between long lines without side comments and
+ # thus be harder to read.
+
+ my $group_level = $self->[_group_level_];
+ my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
+ && $group_level == $self->[_last_level_written_];
+
+ # Find groups with side comments, and remember the first nonblank comment
+ my $j_sc_beg;
+ my @todo;
+ my $ng = -1;
+ foreach my $item ( @{$rgroups} ) {
+ $ng++;
+ my ( $jbeg, $jend ) = @{$item};
+ foreach my $j ( $jbeg .. $jend ) {
+ my $line = $rlines->[$j];
+ my $jmax = $line->get_jmax();
+ if ( $line->get_rfield_lengths()->[$jmax] ) {
+
+ # this group has a line with a side comment
+ push @todo, $ng;
+ if ( !defined($j_sc_beg) ) {
+ $j_sc_beg = $j;
+ }
+ last;
+ }
}
- $i++;
}
- my $kmax = $maximum_field_index + 1;
+ # done if no groups with side comments
+ return unless @todo;
+
+ # Count $num5 = number of comments in the 5 lines after the first comment
+ # This is an important factor in a decision formula
+ my $num5 = 1;
+ for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
+ my $ldiff = $jj - $j_sc_beg;
+ last if ( $ldiff > 5 );
+ my $line = $rlines->[$jj];
+ my $jmax = $line->get_jmax();
+ my $sc_len = $line->get_rfield_lengths()->[$jmax];
+ next unless ($sc_len);
+ $num5++;
+ }
+
+ # Forget the old side comment location if necessary
+ my $line = $rlines->[$j_sc_beg];
+ my $lnum =
+ $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
+ my $keep_it =
+ $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
+ my $last_side_comment_column =
+ $keep_it ? $self->[_last_side_comment_column_] : 0;
+
+ # If there are multiple groups we will do two passes
+ # so that we can find a common alignment for all groups.
+ my $MAX_PASS = @todo > 1 ? 2 : 1;
+
+ # Loop over passes
+ my $max_comment_column = $last_side_comment_column;
+ for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
+
+ # If there are two passes, then on the last pass make the old column
+ # equal to the largest of the group. This will result in the comments
+ # being aligned if possible.
+ if ( $PASS == $MAX_PASS ) {
+ $last_side_comment_column = $max_comment_column;
+ }
- if ($have_side_comment) {
+ # Loop over the groups with side comments
+ my $column_limit;
+ foreach my $ng (@todo) {
+ my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
- my $line = $group_lines[0];
+ # Note that since all lines in a group have common alignments, we
+ # just have to work on one of the lines (the first line).
+ my $line = $rlines->[$jbeg];
+ my $jmax = $line->get_jmax();
+ my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
+ last
+ if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
- # the maximum space without exceeding the line length:
- my $avail = $line->get_available_space_on_right();
+ # the maximum space without exceeding the line length:
+ my $avail = $line->get_available_space_on_right();
- # try to use the previous comment column
- my $side_comment_column = $line->get_column( $kmax - 2 );
- my $move = $last_comment_column - $side_comment_column;
+ # try to use the previous comment column
+ my $side_comment_column = $line->get_column( $jmax - 1 );
+ my $move = $last_side_comment_column - $side_comment_column;
-## my $sc_line0 = $side_comment_history[0]->[0];
-## my $sc_col0 = $side_comment_history[0]->[1];
-## my $sc_line1 = $side_comment_history[1]->[0];
-## my $sc_col1 = $side_comment_history[1]->[1];
-## my $sc_line2 = $side_comment_history[2]->[0];
-## my $sc_col2 = $side_comment_history[2]->[1];
-##
-## # FUTURE UPDATES:
-## # Be sure to ignore 'do not align' and '} # end comments'
-## # Find first $move > 0 and $move <= $avail as follows:
-## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
-## # 2. try sc_col2 if (line-sc_line2) < 12
-## # 3. try min possible space, plus up to 8,
-## # 4. try min possible space
+ # Remember the maximum possible column of the first line with
+ # side comment
+ if ( !defined($column_limit) ) {
+ $column_limit = $side_comment_column + $avail;
+ }
- if ( $kmax > 0 && !$do_not_align ) {
+ next if ( $jmax <= 0 );
# but if this doesn't work, give up and use the minimum space
+ my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
if ( $move > $avail ) {
- $move = $rOpts_minimum_space_to_comment - 1;
+ $move = $min_move;
}
# but we want some minimum space to the comment
- my $min_move = $rOpts_minimum_space_to_comment - 1;
if ( $move >= 0
- && $last_side_comment_length > 0
- && ( $first_side_comment_line == 0 )
- && $group_level == $last_level_written )
+ && $j_sc_beg == 0
+ && $continuing_sc_flow )
{
$min_move = 0;
}
+ # remove constraints on hanging side comments
+ if ($is_hanging_side_comment) { $min_move = 0 }
+
if ( $move < $min_move ) {
$move = $min_move;
}
- # previously, an upper bound was placed on $move here,
- # (maximum_space_to_comment), but it was not helpful
-
# don't exceed the available space
if ( $move > $avail ) { $move = $avail }
- # we can only increase space, never decrease
- if ( $move > 0 ) {
- $line->increase_field_width( $maximum_field_index - 1, $move );
- }
+ # We can only increase space, never decrease.
+ if ( $move < 0 ) { $move = 0 }
- # remember this column for the next group
- $last_comment_column = $line->get_column( $kmax - 2 );
- }
- else {
+ # Discover the largest column on the preliminary pass
+ if ( $PASS < $MAX_PASS ) {
+ my $col = $line->get_column( $jmax - 1 ) + $move;
- # try to at least line up the existing side comment location
- if ( $kmax > 0 && $move > 0 && $move < $avail ) {
- $line->increase_field_width( $maximum_field_index - 1, $move );
- $do_not_align = 0;
+ # but ignore columns too large for the starting line
+ if ( $col > $max_comment_column && $col < $column_limit ) {
+ $max_comment_column = $col;
+ }
}
- # reset side comment column if we can't align
+ # Make the changes on the final pass
else {
- forget_side_comment();
+ $line->increase_field_width( $jmax - 1, $move );
+
+ # remember this column for the next group
+ $last_side_comment_column = $line->get_column( $jmax - 1 );
}
+ } ## end loop over groups
+ } ## end loop over passes
+
+ # Find the last side comment
+ my $j_sc_last;
+ my $ng_last = $todo[-1];
+ my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
+ for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
+ my $line = $rlines->[$jj];
+ my $jmax = $line->get_jmax();
+ if ( $line->get_rfield_lengths()->[$jmax] ) {
+ $j_sc_last = $jj;
+ last;
}
}
- return $do_not_align;
+
+ # Save final side comment info for possible use by the next batch
+ if ( defined($j_sc_last) ) {
+ my $line_number =
+ $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
+ $self->[_last_side_comment_column_] = $last_side_comment_column;
+ $self->[_last_side_comment_line_number_] = $line_number;
+ $self->[_last_side_comment_level_] = $group_level;
+ }
+ return;
}
+###############################
+# CODE SECTION 6: Output Step A
+###############################
+
sub valign_output_step_A {
###############################################################
# been found. Then it is shipped to the next step.
###############################################################
- my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
- $extra_leading_spaces )
- = @_;
+ my ( $self, $rinput_hash ) = @_;
+
+ my $line = $rinput_hash->{line};
+ my $min_ci_gap = $rinput_hash->{min_ci_gap};
+ my $do_not_align = $rinput_hash->{do_not_align};
+ my $group_leader_length = $rinput_hash->{group_leader_length};
+ my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
+ my $level = $rinput_hash->{level};
+ my $maximum_line_length = $rinput_hash->{maximum_line_length};
+
my $rfields = $line->get_rfields();
+ my $rfield_lengths = $line->get_rfield_lengths();
my $leading_space_count = $line->get_leading_space_count();
my $outdent_long_lines = $line->get_outdent_long_lines();
my $maximum_field_index = $line->get_jmax();
my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
+ my $Kend = $line->get_Kend();
+ my $level_end = $line->get_level_end();
# add any extra spaces
if ( $leading_space_count > $group_leader_length ) {
$leading_space_count += $min_ci_gap;
}
- my $str = $rfields->[0];
+ my $str = $rfields->[0];
+ my $str_len = $rfield_lengths->[0];
# loop to concatenate all fields of this line and needed padding
my $total_pad_count = 0;
if (
( $j == $maximum_field_index )
&& ( !defined( $rfields->[$j] )
- || ( length( $rfields->[$j] ) == 0 ) )
+ || ( $rfield_lengths->[$j] == 0 ) )
);
# compute spaces of padding before this field
my $col = $line->get_column( $j - 1 );
- my $pad = $col - ( length($str) + $leading_space_count );
+ my $pad = $col - ( $str_len + $leading_space_count );
if ($do_not_align) {
$pad =
( $j < $maximum_field_index )
? 0
- : $rOpts_minimum_space_to_comment - 1;
+ : $self->[_rOpts_minimum_space_to_comment_] - 1;
}
# if the -fpsc flag is set, move the side comment to the selected
# column if and only if it is possible, ignoring constraints on
# line length and minimum space to comment
- if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
+ if ( $self->[_rOpts_fixed_position_side_comment_]
+ && $j == $maximum_field_index )
{
- my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
+ my $newpad =
+ $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
if ( $newpad >= 0 ) { $pad = $newpad; }
}
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
- # add this field
- if ( !defined $rfields->[$j] ) {
- write_diagnostics("UNDEFined field at j=$j\n");
- }
-
# only add padding when we have a finite field;
# this avoids extra terminal spaces if we have empty fields
- if ( length( $rfields->[$j] ) > 0 ) {
+ if ( $rfield_lengths->[$j] > 0 ) {
$str .= ' ' x $total_pad_count;
+ $str_len += $total_pad_count;
$total_pad_count = 0;
$str .= $rfields->[$j];
+ $str_len += $rfield_lengths->[$j];
}
else {
$total_pad_count = 0;
}
-
- # update side comment history buffer
- if ( $j == $maximum_field_index ) {
- my $lineno = $file_writer_object->get_output_line_number();
- shift @side_comment_history;
- push @side_comment_history, [ $lineno, $col ];
- }
}
- my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) );
+ my $side_comment_length = $rfield_lengths->[$maximum_field_index];
# ship this line off
- valign_output_step_B( $leading_space_count + $extra_leading_spaces,
- $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags, $group_level );
+ $self->valign_output_step_B(
+ {
+ leading_space_count => $leading_space_count + $extra_leading_spaces,
+ line => $str,
+ line_length => $str_len,
+ side_comment_length => $side_comment_length,
+ outdent_long_lines => $outdent_long_lines,
+ rvertical_tightness_flags => $rvertical_tightness_flags,
+ level => $level,
+ level_end => $level_end,
+ Kend => $Kend,
+ maximum_line_length => $maximum_line_length,
+ }
+ );
return;
}
-sub get_extra_leading_spaces {
+sub combine_fields {
- #----------------------------------------------------------
- # Define any extra indentation space (for the -lp option).
- # Here is why:
- # If a list has side comments, sub scan_list must dump the
- # list before it sees everything. When this happens, it sets
- # the indentation to the standard scheme, but notes how
- # many spaces it would have liked to use. We may be able
- # to recover that space here in the event that all of the
- # lines of a list are back together again.
- #----------------------------------------------------------
+ # We have a group of two lines for which we do not want to align tokens
+ # between index $imax_align and the side comment. So we will delete fields
+ # between $imax_align and the side comment. Alignments have already
+ # been set so we have to adjust them.
- my $extra_leading_spaces = 0;
- if ($extra_indent_ok) {
- my $object = $group_lines[0]->get_indentation();
- if ( ref($object) ) {
- my $extra_indentation_spaces_wanted =
- get_recoverable_spaces($object);
+ my ( $line_0, $line_1, $imax_align ) = @_;
- # all indentation objects must be the same
- for my $i ( 1 .. @group_lines - 1 ) {
- if ( $object != $group_lines[$i]->get_indentation() ) {
- $extra_indentation_spaces_wanted = 0;
- last;
- }
- }
+ if ( !defined($imax_align) ) { $imax_align = -1 }
- if ($extra_indentation_spaces_wanted) {
+ # First delete the unwanted tokens
+ my $jmax_old = $line_0->get_jmax();
+ my @old_alignments = $line_0->get_alignments();
+ my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
- # the maximum space without exceeding the line length:
- my $avail = $group_lines[0]->get_available_space_on_right();
- $extra_leading_spaces =
- ( $avail > $extra_indentation_spaces_wanted )
- ? $extra_indentation_spaces_wanted
- : $avail;
+ return unless (@idel);
- # update the indentation object because with -icp the terminal
- # ');' will use the same adjustment.
- $object->permanently_decrease_available_spaces(
- -$extra_leading_spaces );
- }
- }
+ foreach my $line ( $line_0, $line_1 ) {
+ delete_selected_tokens( $line, \@idel );
}
- return $extra_leading_spaces;
-}
-
-sub combine_fields {
-
- # combine all fields except for the comment field ( sidecmt.t )
- # Uses global variables:
- # @group_lines
- my $maximum_field_index = $group_lines[0]->get_jmax();
- foreach my $line (@group_lines) {
- my $rfields = $line->get_rfields();
- foreach ( 1 .. $maximum_field_index - 1 ) {
- $rfields->[0] .= $rfields->[$_];
- }
- $rfields->[1] = $rfields->[$maximum_field_index];
-
- $line->set_jmax(1);
- $line->set_column( 0, 0 );
- $line->set_column( 1, 0 );
+ # Now adjust the alignments. Note that the side comment alignment
+ # is always at jmax-1, and there is an ending alignment at jmax.
+ my @new_alignments;
+ if ( $imax_align >= 0 ) {
+ @new_alignments[ 0 .. $imax_align ] =
+ @old_alignments[ 0 .. $imax_align ];
}
- $maximum_field_index = 1;
- foreach my $line (@group_lines) {
- my $rfields = $line->get_rfields();
- for my $k ( 0 .. $maximum_field_index ) {
- my $pad = length( $rfields->[$k] ) - $line->current_field_width($k);
- if ( $k == 0 ) {
- $pad += $line->get_leading_space_count();
- }
-
- if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
+ my $jmax_new = $line_0->get_jmax();
- }
- }
+ $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
+ $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
+ $line_0->set_alignments(@new_alignments);
+ $line_1->set_alignments(@new_alignments);
return;
}
sub get_output_line_number {
- # the output line number reported to a caller is the number of items
- # written plus the number of items in the buffer
- my $self = shift;
- my $nlines = @group_lines;
- return $nlines + $file_writer_object->get_output_line_number();
+ # The output line number reported to a caller =
+ # the number of items still in the buffer +
+ # the number of items written.
+ return $_[0]->group_line_count() +
+ $_[0]->[_file_writer_object_]->get_output_line_number();
}
-sub valign_output_step_B {
-
- ###############################################################
- # This is Step B in writing vertically aligned lines.
- # Vertical tightness is applied according to preset flags.
- # In particular this routine handles stacking of opening
- # and closing tokens.
- ###############################################################
+###############################
+# CODE SECTION 7: Output Step B
+###############################
+
+{ ## closure for sub valign_output_step_B
+
+ # These are values for a cache used by valign_output_step_B.
+ my $cached_line_text;
+ my $cached_line_text_length;
+ my $cached_line_type;
+ my $cached_line_opening_flag;
+ my $cached_line_closing_flag;
+ my $cached_seqno;
+ my $cached_line_valid;
+ my $cached_line_leading_space_count;
+ my $cached_seqno_string;
+ my $cached_line_Kend;
+ my $cached_line_maximum_length;
+ my $seqno_string;
+ my $last_nonblank_seqno_string;
+
+ sub get_seqno_string {
+ return $seqno_string;
+ }
- my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags, $level )
- = @_;
+ sub get_last_nonblank_seqno_string {
+ return $last_nonblank_seqno_string;
+ }
- # handle outdenting of long lines:
- if ($outdent_long_lines) {
- my $excess =
- length($str) -
- $side_comment_length +
- $leading_space_count -
- maximum_line_length_for_level($level);
- if ( $excess > 0 ) {
- $leading_space_count = 0;
- $last_outdented_line_at =
- $file_writer_object->get_output_line_number();
+ sub set_last_nonblank_seqno_string {
+ my ($val) = @_;
+ $last_nonblank_seqno_string = $val;
+ return;
+ }
- unless ($outdented_line_count) {
- $first_outdented_line_at = $last_outdented_line_at;
- }
- $outdented_line_count++;
- }
+ sub get_cached_line_opening_flag {
+ return $cached_line_opening_flag;
}
- # Make preliminary leading whitespace. It could get changed
- # later by entabbing, so we have to keep track of any changes
- # to the leading_space_count from here on.
- my $leading_string =
- $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
+ sub get_cached_line_type {
+ return $cached_line_type;
+ }
- # Unpack any recombination data; it was packed by
- # sub send_lines_to_vertical_aligner. Contents:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false
- #
- my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
- $seqno_end );
- if ($rvertical_tightness_flags) {
- (
- $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
- $seqno_end
- ) = @{$rvertical_tightness_flags};
+ sub set_cached_line_valid {
+ my ($val) = @_;
+ $cached_line_valid = $val;
+ return;
}
- $seqno_string = $seqno_end;
+ sub get_cached_seqno {
+ return $cached_seqno;
+ }
- # handle any cached line ..
- # either append this line to it or write it out
- if ( length($cached_line_text) ) {
+ sub initialize_step_B_cache {
+
+ # valign_output_step_B cache:
+ $cached_line_text = "";
+ $cached_line_text_length = 0;
+ $cached_line_type = 0;
+ $cached_line_opening_flag = 0;
+ $cached_line_closing_flag = 0;
+ $cached_seqno = 0;
+ $cached_line_valid = 0;
+ $cached_line_leading_space_count = 0;
+ $cached_seqno_string = "";
+ $cached_line_Kend = undef;
+ $cached_line_maximum_length = undef;
+
+ # These vars hold a string of sequence numbers joined together used by
+ # the cache
+ $seqno_string = "";
+ $last_nonblank_seqno_string = "";
+ return;
+ }
- # Dump an invalid cached line
- if ( !$cached_line_valid ) {
- valign_output_step_C( $cached_line_text,
+ sub _flush_cache {
+ my ($self) = @_;
+ if ($cached_line_type) {
+ $seqno_string = $cached_seqno_string;
+ $self->valign_output_step_C(
+ $cached_line_text,
$cached_line_leading_space_count,
- $last_level_written );
+ $self->[_last_level_written_],
+ $cached_line_Kend,
+ );
+ $cached_line_type = 0;
+ $cached_line_text = "";
+ $cached_line_text_length = 0;
+ $cached_seqno_string = "";
+ $cached_line_Kend = undef;
+ $cached_line_maximum_length = undef;
}
+ return;
+ }
- # Handle cached line ending in OPENING tokens
- elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
-
- my $gap = $leading_space_count - length($cached_line_text);
-
- # handle option of just one tight opening per line:
- if ( $cached_line_flag == 1 ) {
- if ( defined($open_or_close) && $open_or_close == 1 ) {
- $gap = -1;
+ sub valign_output_step_B {
+
+ ###############################################################
+ # This is Step B in writing vertically aligned lines.
+ # Vertical tightness is applied according to preset flags.
+ # In particular this routine handles stacking of opening
+ # and closing tokens.
+ ###############################################################
+
+ my ( $self, $rinput ) = @_;
+
+ my $leading_space_count = $rinput->{leading_space_count};
+ my $str = $rinput->{line};
+ my $str_length = $rinput->{line_length};
+ my $side_comment_length = $rinput->{side_comment_length};
+ my $outdent_long_lines = $rinput->{outdent_long_lines};
+ my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
+ my $level = $rinput->{level};
+ my $level_end = $rinput->{level_end};
+ my $Kend = $rinput->{Kend};
+ my $maximum_line_length = $rinput->{maximum_line_length};
+
+ my $last_level_written = $self->[_last_level_written_];
+
+ # Useful -gcs test cases for wide characters are
+ # perl527/(method.t.2, reg_mesg.t, mime-header.t)
+
+ # handle outdenting of long lines:
+ my $is_outdented_line;
+ if ($outdent_long_lines) {
+ my $excess =
+ $str_length -
+ $side_comment_length +
+ $leading_space_count -
+ $maximum_line_length;
+ if ( $excess > 0 ) {
+ $leading_space_count = 0;
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $last_outdented_line_at =
+ $file_writer_object->get_output_line_number();
+ $self->[_last_outdented_line_at_] = $last_outdented_line_at;
+
+ my $outdented_line_count = $self->[_outdented_line_count_];
+ unless ($outdented_line_count) {
+ $self->[_first_outdented_line_at_] =
+ $last_outdented_line_at;
}
+ $outdented_line_count++;
+ $self->[_outdented_line_count_] = $outdented_line_count;
+ $is_outdented_line = 1;
}
+ }
+
+ # Make preliminary leading whitespace. It could get changed
+ # later by entabbing, so we have to keep track of any changes
+ # to the leading_space_count from here on.
+ my $leading_string =
+ $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
+ my $leading_string_length = length($leading_string);
+
+ # Unpack any recombination data; it was packed by
+ # sub 'Formatter::set_vertical_tightness_flags'
+
+ # old hash Meaning
+ # index key
+ #
+ # 0 _vt_type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ #
+ # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
+ # 1b _vt_closing_flag: spaces of padding to use if closing
+ # 2 _vt_seqno: sequence number of container
+ # 3 _vt_valid flag: do not append if this flag is false. Will be
+ # true if appropriate -vt flag is set. Otherwise, Will be
+ # made true only for 2 line container in parens with -lp
+ # 4 _vt_seqno_beg: sequence number of first token of line
+ # 5 _vt_seqno_end: sequence number of last token of line
+ # 6 _vt_min_lines: min number of lines for joining opening cache,
+ # 0=no constraint
+ # 7 _vt_max_lines: max number of lines for joining opening cache,
+ # 0=no constraint
+
+ my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
+ $seqno_beg, $seqno_end );
+ if ($rvertical_tightness_flags) {
+
+ $open_or_close = $rvertical_tightness_flags->{_vt_type};
+ $opening_flag = $rvertical_tightness_flags->{_vt_opening_flag};
+ $closing_flag = $rvertical_tightness_flags->{_vt_closing_flag};
+ $seqno = $rvertical_tightness_flags->{_vt_seqno};
+ $valid = $rvertical_tightness_flags->{_vt_valid_flag};
+ $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
+ $seqno_end = $rvertical_tightness_flags->{_vt_seqno_end};
+ }
- if ( $gap >= 0 && defined($seqno_beg) ) {
- $leading_string = $cached_line_text . ' ' x $gap;
- $leading_space_count = $cached_line_leading_space_count;
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
- $level = $last_level_written;
+ $seqno_string = $seqno_end;
+
+ # handle any cached line ..
+ # either append this line to it or write it out
+ # Note: the function length() is used in this next test out of caution.
+ # All testing has shown that the variable $cached_line_text_length is
+ # correct, but its calculation is complex and a loss of cached text
+ # would be a disaster.
+ if ( length($cached_line_text) ) {
+
+ # Dump an invalid cached line
+ if ( !$cached_line_valid ) {
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
}
- else {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+
+ # Handle cached line ending in OPENING tokens
+ elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
+
+ my $gap = $leading_space_count - $cached_line_text_length;
+
+ # handle option of just one tight opening per line:
+ if ( $cached_line_opening_flag == 1 ) {
+ if ( defined($open_or_close) && $open_or_close == 1 ) {
+ $gap = -1;
+ }
+ }
+
+ # Do not join the lines if this might produce a one-line
+ # container which exceeds the maximum line length. This is
+ # necessary prevent blinking, particularly with the combination
+ # -xci -pvt=2. In that case a one-line block alternately forms
+ # and breaks, causing -xci to alternately turn on and off (case
+ # b765).
+ # Patched to fix cases b656 b862 b971 b972: always do the check
+ # if the maximum line length changes (due to -vmll).
+ if (
+ $gap >= 0
+ && ( $maximum_line_length != $cached_line_maximum_length
+ || ( defined($level_end) && $level > $level_end ) )
+ )
+ {
+ my $test_line_length =
+ $cached_line_text_length + $gap + $str_length;
+
+ # Add a small tolerance in the length test (fixes case b862)
+ if ( $test_line_length > $cached_line_maximum_length - 2 ) {
+ $gap = -1;
+ }
+ }
+
+ if ( $gap >= 0 && defined($seqno_beg) ) {
+ $maximum_line_length = $cached_line_maximum_length;
+ $leading_string = $cached_line_text . ' ' x $gap;
+ $leading_string_length = $cached_line_text_length + $gap;
+ $leading_space_count = $cached_line_leading_space_count;
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+ $level = $last_level_written;
+ }
+ else {
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
+ }
}
- }
- # Handle cached line ending in CLOSING tokens
- else {
- my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
- if (
+ # Handle cached line ending in CLOSING tokens
+ else {
+ my $test_line =
+ $cached_line_text . ' ' x $cached_line_closing_flag . $str;
+ my $test_line_length =
+ $cached_line_text_length +
+ $cached_line_closing_flag +
+ $str_length;
+ if (
- # The new line must start with container
- $seqno_beg
+ # The new line must start with container
+ $seqno_beg
- # The container combination must be okay..
- && (
+ # The container combination must be okay..
+ && (
- # okay to combine like types
- ( $open_or_close == $cached_line_type )
+ # okay to combine like types
+ ( $open_or_close == $cached_line_type )
- # closing block brace may append to non-block
- || ( $cached_line_type == 2 && $open_or_close == 4 )
+ # closing block brace may append to non-block
+ || ( $cached_line_type == 2 && $open_or_close == 4 )
- # something like ');'
- || ( !$open_or_close && $cached_line_type == 2 )
+ # something like ');'
+ || ( !$open_or_close && $cached_line_type == 2 )
- )
+ )
- # The combined line must fit
- && (
- length($test_line) <=
- maximum_line_length_for_level($last_level_written) )
- )
- {
+ # The combined line must fit
+ && ( $test_line_length <= $cached_line_maximum_length )
+ )
+ {
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
-
- # Patch to outdent closing tokens ending # in ');'
- # If we are joining a line like ');' to a previous stacked
- # set of closing tokens, then decide if we may outdent the
- # combined stack to the indentation of the ');'. Since we
- # should not normally outdent any of the other tokens more than
- # the indentation of the lines that contained them, we will
- # only do this if all of the corresponding opening
- # tokens were on the same line. This can happen with
- # -sot and -sct. For example, it is ok here:
- # __PACKAGE__->load_components( qw(
- # PK::Auto
- # Core
- # ));
- #
- # But, for example, we do not outdent in this example because
- # that would put the closing sub brace out farther than the
- # opening sub brace:
- #
- # perltidy -sot -sct
- # $c->Tk::bind(
- # '<Control-f>' => sub {
- # my ($c) = @_;
- # my $e = $c->XEvent;
- # itemsUnderArea $c;
- # } );
- #
- if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
-
- # The way to tell this is if the stacked sequence numbers
- # of this output line are the reverse of the stacked
- # sequence numbers of the previous non-blank line of
- # sequence numbers. So we can join if the previous
- # nonblank string of tokens is the mirror image. For
- # example if stack )}] is 13:8:6 then we are looking for a
- # leading stack like [{( which is 6:8:13 We only need to
- # check the two ends, because the intermediate tokens must
- # fall in order. Note on speed: having to split on colons
- # and eliminate multiple colons might appear to be slow,
- # but it's not an issue because we almost never come
- # through here. In a typical file we don't.
- $seqno_string =~ s/^:+//;
- $last_nonblank_seqno_string =~ s/^:+//;
- $seqno_string =~ s/:+/:/g;
- $last_nonblank_seqno_string =~ s/:+/:/g;
-
- # how many spaces can we outdent?
- my $diff =
- $cached_line_leading_space_count - $leading_space_count;
- if ( $diff > 0
- && length($seqno_string)
- && length($last_nonblank_seqno_string) ==
- length($seqno_string) )
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+
+ # Patch to outdent closing tokens ending # in ');' If we
+ # are joining a line like ');' to a previous stacked set of
+ # closing tokens, then decide if we may outdent the
+ # combined stack to the indentation of the ');'. Since we
+ # should not normally outdent any of the other tokens more
+ # than the indentation of the lines that contained them, we
+ # will only do this if all of the corresponding opening
+ # tokens were on the same line. This can happen with -sot
+ # and -sct.
+
+ # For example, it is ok here:
+ # __PACKAGE__->load_components( qw(
+ # PK::Auto
+ # Core
+ # ));
+ #
+ # But, for example, we do not outdent in this example
+ # because that would put the closing sub brace out farther
+ # than the opening sub brace:
+ #
+ # perltidy -sot -sct
+ # $c->Tk::bind(
+ # '<Control-f>' => sub {
+ # my ($c) = @_;
+ # my $e = $c->XEvent;
+ # itemsUnderArea $c;
+ # } );
+ #
+ if ( $str =~ /^\);/
+ && $cached_line_text =~ /^[\)\}\]\s]*$/ )
{
- my @seqno_last =
- ( split /:/, $last_nonblank_seqno_string );
- my @seqno_now = ( split /:/, $seqno_string );
- if ( @seqno_now
- && @seqno_last
- && $seqno_now[-1] == $seqno_last[0]
- && $seqno_now[0] == $seqno_last[-1] )
- {
- # OK to outdent ..
- # for absolute safety, be sure we only remove
- # whitespace
- my $ws = substr( $test_line, 0, $diff );
- if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
-
- $test_line = substr( $test_line, $diff );
- $cached_line_leading_space_count -= $diff;
- $last_level_written =
- level_change(
- $cached_line_leading_space_count,
- $diff, $last_level_written );
- reduce_valign_buffer_indentation($diff);
+ # The way to tell this is if the stacked sequence
+ # numbers of this output line are the reverse of the
+ # stacked sequence numbers of the previous non-blank
+ # line of sequence numbers. So we can join if the
+ # previous nonblank string of tokens is the mirror
+ # image. For example if stack )}] is 13:8:6 then we
+ # are looking for a leading stack like [{( which
+ # is 6:8:13. We only need to check the two ends,
+ # because the intermediate tokens must fall in order.
+ # Note on speed: having to split on colons and
+ # eliminate multiple colons might appear to be slow,
+ # but it's not an issue because we almost never come
+ # through here. In a typical file we don't.
+
+ $seqno_string =~ s/^:+//;
+ $last_nonblank_seqno_string =~ s/^:+//;
+ $seqno_string =~ s/:+/:/g;
+ $last_nonblank_seqno_string =~ s/:+/:/g;
+
+ # how many spaces can we outdent?
+ my $diff =
+ $cached_line_leading_space_count -
+ $leading_space_count;
+ if ( $diff > 0
+ && length($seqno_string)
+ && length($last_nonblank_seqno_string) ==
+ length($seqno_string) )
+ {
+ my @seqno_last =
+ ( split /:/, $last_nonblank_seqno_string );
+ my @seqno_now = ( split /:/, $seqno_string );
+ if ( @seqno_now
+ && @seqno_last
+ && $seqno_now[-1] == $seqno_last[0]
+ && $seqno_now[0] == $seqno_last[-1] )
+ {
+
+ # OK to outdent ..
+ # for absolute safety, be sure we only remove
+ # whitespace
+ my $ws = substr( $test_line, 0, $diff );
+ if ( ( length($ws) == $diff )
+ && $ws =~ /^\s+$/ )
+ {
+
+ $test_line = substr( $test_line, $diff );
+ $cached_line_leading_space_count -= $diff;
+ $last_level_written =
+ $self->level_change(
+ $cached_line_leading_space_count,
+ $diff, $last_level_written );
+ $self->reduce_valign_buffer_indentation(
+ $diff);
+ }
+
+ # shouldn't happen, but not critical:
+ ##else {
+ ## ERROR transferring indentation here
+ ##}
}
-
- # shouldn't happen, but not critical:
- ##else {
- ## ERROR transferring indentation here
- ##}
}
}
- }
- $str = $test_line;
- $leading_string = "";
- $leading_space_count = $cached_line_leading_space_count;
- $level = $last_level_written;
+ # Change the args to look like we received the combined line
+ $str = $test_line;
+ $str_length = $test_line_length;
+ $leading_string = "";
+ $leading_string_length = 0;
+ $leading_space_count = $cached_line_leading_space_count;
+ $level = $last_level_written;
+ $maximum_line_length = $cached_line_maximum_length;
+ }
+ else {
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
+ }
}
- else {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+ }
+ $cached_line_type = 0;
+ $cached_line_text = "";
+ $cached_line_text_length = 0;
+ $cached_line_Kend = undef;
+ $cached_line_maximum_length = undef;
+
+ # make the line to be written
+ my $line = $leading_string . $str;
+ my $line_length = $leading_string_length + $str_length;
+
+ # Safety check: be sure that a line to be cached as a stacked block
+ # brace line ends in the appropriate opening or closing block brace.
+ # This should always be the case if the caller set flags correctly.
+ # Code '3' is for -sobb, code '4' is for -scbb.
+ if ($open_or_close) {
+ if ( $open_or_close == 3 && $line !~ /\{\s*$/
+ || $open_or_close == 4 && $line !~ /\}\s*$/ )
+ {
+ $open_or_close = 0;
}
}
- }
- $cached_line_type = 0;
- $cached_line_text = "";
- # make the line to be written
- my $line = $leading_string . $str;
+ # write or cache this line ...
+ # fix for case b999: do not cache an outdented line
+ if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
+ {
+ $self->valign_output_step_C( $line, $leading_space_count, $level,
+ $Kend );
+ }
+ else {
+ $cached_line_text = $line;
+ $cached_line_text_length = $line_length;
+ $cached_line_type = $open_or_close;
+ $cached_line_opening_flag = $opening_flag;
+ $cached_line_closing_flag = $closing_flag;
+ $cached_seqno = $seqno;
+ $cached_line_valid = $valid;
+ $cached_line_leading_space_count = $leading_space_count;
+ $cached_seqno_string = $seqno_string;
+ $cached_line_Kend = $Kend;
+ $cached_line_maximum_length = $maximum_line_length;
+ }
- # write or cache this line
- if ( !$open_or_close || $side_comment_length > 0 ) {
- valign_output_step_C( $line, $leading_space_count, $level );
+ $self->[_last_level_written_] = $level;
+ $self->[_last_side_comment_length_] = $side_comment_length;
+ return;
}
- else {
- $cached_line_text = $line;
- $cached_line_type = $open_or_close;
- $cached_line_flag = $tightness_flag;
- $cached_seqno = $seqno;
- $cached_line_valid = $valid;
- $cached_line_leading_space_count = $leading_space_count;
- $cached_seqno_string = $seqno_string;
- }
-
- $last_level_written = $level;
- $last_side_comment_length = $side_comment_length;
- $extra_indent_ok = 0;
- return;
}
-sub valign_output_step_C {
+###############################
+# CODE SECTION 8: Output Step C
+###############################
- ###############################################################
- # This is Step C in writing vertically aligned lines.
- # Lines are either stored in a buffer or passed along to the next step.
- # The reason for storing lines is that we may later want to reduce their
- # indentation when -sot and -sct are both used.
- ###############################################################
- my @args = @_;
+{ ## closure for sub valign_output_step_C
- # Dump any saved lines if we see a line with an unbalanced opening or
- # closing token.
- dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
+ # Vertical alignment buffer used by valign_output_step_C
+ my $valign_buffer_filling;
+ my @valign_buffer;
- # Either store or write this line
- if ($valign_buffer_filling) {
- push @valign_buffer, [@args];
+ sub initialize_valign_buffer {
+ @valign_buffer = ();
+ $valign_buffer_filling = "";
+ return;
}
- else {
- valign_output_step_D(@args);
+
+ sub dump_valign_buffer {
+ my ($self) = @_;
+ if (@valign_buffer) {
+ foreach (@valign_buffer) {
+ $self->valign_output_step_D( @{$_} );
+ }
+ @valign_buffer = ();
+ }
+ $valign_buffer_filling = "";
+ return;
+ }
+
+ sub reduce_valign_buffer_indentation {
+
+ my ( $self, $diff ) = @_;
+ if ( $valign_buffer_filling && $diff ) {
+ my $max_valign_buffer = @valign_buffer;
+ foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
+ my ( $line, $leading_space_count, $level, $Kend ) =
+ @{ $valign_buffer[$i] };
+ my $ws = substr( $line, 0, $diff );
+ if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+ $line = substr( $line, $diff );
+ }
+ if ( $leading_space_count >= $diff ) {
+ $leading_space_count -= $diff;
+ $level =
+ $self->level_change( $leading_space_count, $diff,
+ $level );
+ }
+ $valign_buffer[$i] =
+ [ $line, $leading_space_count, $level, $Kend ];
+ }
+ }
+ return;
}
- # For lines starting or ending with opening or closing tokens..
- if ($seqno_string) {
- $last_nonblank_seqno_string = $seqno_string;
+ sub valign_output_step_C {
- # Start storing lines when we see a line with multiple stacked opening
- # tokens.
- # patch for RT #94354, requested by Colin Williams
- if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
- {
+ ###############################################################
+ # This is Step C in writing vertically aligned lines.
+ # Lines are either stored in a buffer or passed along to the next step.
+ # The reason for storing lines is that we may later want to reduce their
+ # indentation when -sot and -sct are both used.
+ ###############################################################
+ my ( $self, @args ) = @_;
+
+ my $seqno_string = get_seqno_string();
+ my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
+
+ # Dump any saved lines if we see a line with an unbalanced opening or
+ # closing token.
+ $self->dump_valign_buffer()
+ if ( $seqno_string && $valign_buffer_filling );
+
+ # Either store or write this line
+ if ($valign_buffer_filling) {
+ push @valign_buffer, [@args];
+ }
+ else {
+ $self->valign_output_step_D(@args);
+ }
+
+ # For lines starting or ending with opening or closing tokens..
+ if ($seqno_string) {
+ $last_nonblank_seqno_string = $seqno_string;
+ set_last_nonblank_seqno_string($seqno_string);
+
+ # Start storing lines when we see a line with multiple stacked
+ # opening tokens.
+ # patch for RT #94354, requested by Colin Williams
+ if ( $seqno_string =~ /^\d+(\:+\d+)+$/
+ && $args[0] !~ /^[\}\)\]\:\?]/ )
+ {
- # This test is efficient but a little subtle: The first test says
- # that we have multiple sequence numbers and hence multiple opening
- # or closing tokens in this line. The second part of the test
- # rejects stacked closing and ternary tokens. So if we get here
- # then we should have stacked unbalanced opening tokens.
+ # This test is efficient but a little subtle: The first test
+ # says that we have multiple sequence numbers and hence
+ # multiple opening or closing tokens in this line. The second
+ # part of the test rejects stacked closing and ternary tokens.
+ # So if we get here then we should have stacked unbalanced
+ # opening tokens.
- # Here is a complex example:
+ # Here is a complex example:
- # Foo($Bar[0], { # (side comment)
- # baz => 1,
- # });
+ # Foo($Bar[0], { # (side comment)
+ # baz => 1,
+ # });
- # The first line has sequence 6::4. It does not begin with
- # a closing token or ternary, so it passes the test and must be
- # stacked opening tokens.
+ # The first line has sequence 6::4. It does not begin with
+ # a closing token or ternary, so it passes the test and must be
+ # stacked opening tokens.
- # The last line has sequence 4:6 but is a stack of closing tokens,
- # so it gets rejected.
+ # The last line has sequence 4:6 but is a stack of closing
+ # tokens, so it gets rejected.
- # Note that the sequence number of an opening token for a qw quote
- # is a negative number and will be rejected.
- # For example, for the following line:
- # skip_symbols([qw(
- # $seqno_string='10:5:-1'. It would be okay to accept it but
- # I decided not to do this after testing.
+ # Note that the sequence number of an opening token for a qw
+ # quote is a negative number and will be rejected. For
+ # example, for the following line: skip_symbols([qw(
+ # $seqno_string='10:5:-1'. It would be okay to accept it but I
+ # decided not to do this after testing.
- $valign_buffer_filling = $seqno_string;
+ $valign_buffer_filling = $seqno_string;
+ }
}
+ return;
}
- return;
}
+###############################
+# CODE SECTION 9: Output Step D
+###############################
+
sub valign_output_step_D {
###############################################################
# This is Step D in writing vertically aligned lines.
+ # It is the end of the vertical alignment pipeline.
# Write one vertically aligned line of code to the output object.
###############################################################
- my ( $line, $leading_space_count, $level ) = @_;
+ my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
# The line is currently correct if there is no tabbing (recommended!)
# We may have to lop off some leading spaces and replace with tabs.
if ( $leading_space_count > 0 ) {
+ my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
+ my $rOpts_tabs = $self->[_rOpts_tabs_];
+ my $rOpts_entab_leading_whitespace =
+ $self->[_rOpts_entab_leading_whitespace_];
+
# Nothing to do if no tabs
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
# Handle entab option
elsif ($rOpts_entab_leading_whitespace) {
- # Patch 12-nov-2018 based on report from Glenn. Extra padding was
- # not correctly entabbed, nor were side comments:
- # Increase leading space count for a padded line to get correct tabbing
+ # Patch 12-nov-2018 based on report from Glenn. Extra padding was
+ # not correctly entabbed, nor were side comments: Increase leading
+ # space count for a padded line to get correct tabbing
if ( $line =~ /^(\s+)(.*)$/ ) {
my $spaces = length($1);
if ( $spaces > $leading_space_count ) {
# shouldn't happen - program error counting whitespace
# - skip entabbing
- VALIGN_DEBUG_FLAG_TABS
+ DEBUG_TABS
&& warning(
"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
);
# But it could be an outdented comment
if ( $line !~ /^\s*#/ ) {
- VALIGN_DEBUG_FLAG_TABS
+ DEBUG_TABS
&& warning(
-"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
+"Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
);
}
$leading_string = ( ' ' x $leading_space_count );
# shouldn't happen - program error counting whitespace
# we'll skip entabbing
- VALIGN_DEBUG_FLAG_TABS
+ DEBUG_TABS
&& warning(
"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
);
}
}
}
- $file_writer_object->write_code_line( $line . "\n" );
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line( $line . "\n", $Kend );
+
return;
}
-{ # begin get_leading_string
+{ ## closure for sub get_leading_string
my @leading_string_cache;
+ sub initialize_leading_string_cache {
+ @leading_string_cache = ();
+ return;
+ }
+
sub get_leading_string {
# define the leading whitespace string for this line..
- my $leading_whitespace_count = shift;
+ my ( $self, $leading_whitespace_count, $group_level ) = @_;
# Handle case of zero whitespace, which includes multi-line quotes
# (which may have a finite level; this prevents tab problems)
my $leading_string;
# Handle simple case of no tabs
+ my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
+ my $rOpts_tabs = $self->[_rOpts_tabs_];
+ my $rOpts_entab_leading_whitespace =
+ $self->[_rOpts_entab_leading_whitespace_];
+
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
# shouldn't happen:
if ( $space_count < 0 ) {
- VALIGN_DEBUG_FLAG_TABS
+ DEBUG_TABS
&& warning(
"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
);
$leading_string_cache[$leading_whitespace_count] = $leading_string;
return $leading_string;
}
-} # end get_leading_string
+} ## end get_leading_string
+
+##########################
+# CODE SECTION 10: Summary
+##########################
sub report_anything_unusual {
my $self = shift;
+
+ my $outdented_line_count = $self->[_outdented_line_count_];
if ( $outdented_line_count > 0 ) {
write_logfile_entry(
"$outdented_line_count long lines were outdented:\n");
+ my $first_outdented_line_at = $self->[_first_outdented_line_at_];
write_logfile_entry(
" First at output line $first_outdented_line_at\n");
if ( $outdented_line_count > 1 ) {
+ my $last_outdented_line_at = $self->[_last_outdented_line_at_];
write_logfile_entry(
" Last at output line $last_outdented_line_at\n");
}