use strict;
use warnings;
use Carp;
-our $VERSION = '20220217';
+use English qw( -no_match_vars );
+our $VERSION = '20221112';
use Perl::Tidy::VerticalAligner::Alignment;
use Perl::Tidy::VerticalAligner::Line;
-use constant DEVEL_MODE => 0;
+use constant DEVEL_MODE => 0;
+use constant EMPTY_STRING => q{};
+use constant SPACE => q{ };
# The Perl::Tidy::VerticalAligner package collects output lines and
# attempts to line up certain common tokens, such as => and #, which are
#
# The sub valign_input collects lines into groups. When a group reaches
# the maximum possible size it is processed for alignment and output.
-# The maximum group size is reached whenerver there is a change in indentation
+# The maximum group size is reached whenever there is a change in indentation
# level, a blank line, a block comment, or an external flush call. The calling
# routine may also force a break in alignment at any time.
#
return;
}
+my %valid_LINE_keys;
+
+BEGIN {
+
+ # define valid keys in a line object
+ my @q = qw(
+ jmax
+ rtokens
+ rfields
+ rfield_lengths
+ rpatterns
+ indentation
+ leading_space_count
+ outdent_long_lines
+ list_type
+ list_seqno
+ is_hanging_side_comment
+ maximum_line_length
+ rvertical_tightness_flags
+ is_terminal_ternary
+ j_terminal_match
+ end_group
+ Kend
+ ci_level
+ level
+ level_end
+ imax_pair
+
+ ralignments
+ );
+
+ @valid_LINE_keys{@q} = (1) x scalar(@q);
+}
+
BEGIN {
# Define the fixed indexes for variables in $self, which is an array
return;
}
+sub check_keys {
+ my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
+
+ # Check the keys of a hash:
+ # $rtest = ref to hash to test
+ # $rvalid = ref to hash with valid keys
+
+ # $msg = a message to write in case of error
+ # $exact_match defines the type of check:
+ # = false: test hash must not have unknown key
+ # = true: test hash must have exactly same keys as known hash
+ my @unknown_keys =
+ grep { !exists $rvalid->{$_} } keys %{$rtest};
+ my @missing_keys =
+ grep { !exists $rtest->{$_} } keys %{$rvalid};
+ my $error = @unknown_keys;
+ if ($exact_match) { $error ||= @missing_keys }
+ if ($error) {
+ local $LIST_SEPARATOR = ')(';
+ my @expected_keys = sort keys %{$rvalid};
+ @unknown_keys = sort @unknown_keys;
+ Fault(<<EOM);
+------------------------------------------------------------------------
+Program error detected checking hash keys
+Message is: '$msg'
+Expected keys: (@expected_keys)
+Unknown key(s): (@unknown_keys)
+Missing key(s): (@missing_keys)
+------------------------------------------------------------------------
+EOM
+ }
+ return;
+} ## end sub check_keys
+
sub new {
my ( $class, @args ) = @_;
# Batch of lines being collected
$self->[_rgroup_lines_] = [];
$self->[_group_level_] = 0;
- $self->[_group_type_] = "";
+ $self->[_group_type_] = EMPTY_STRING;
$self->[_group_maximum_line_length_] = undef;
$self->[_zero_count_] = 0;
$self->[_comment_leading_space_count_] = 0;
# flush() is the external call to completely empty the pipeline.
my ($self) = @_;
- # push things out the pipline...
+ # push things out the pipeline...
# push out any current group lines
$self->_flush_group_lines();
my ($self) = @_;
$self->[_rgroup_lines_] = [];
- $self->[_group_type_] = "";
+ $self->[_group_type_] = EMPTY_STRING;
$self->[_zero_count_] = 0;
$self->[_comment_leading_space_count_] = 0;
$self->[_last_leading_space_count_] = 0;
}
sub get_input_stream_name {
- my $input_stream_name = "";
+ my $input_stream_name = EMPTY_STRING;
if ($logger_object) {
$input_stream_name = $logger_object->get_input_stream_name();
}
@is_closing_token{@q} = (1) x scalar(@q);
}
+#--------------------------------------------
+# VTFLAGS: Vertical tightness types and flags
+#--------------------------------------------
+# Vertical tightness is controlled by a 'type' and associated 'flags' for each
+# line. These values are set by sub Formatter::set_vertical_tightness_flags.
+# These are defined as follows:
+
+# Vertical Tightness Line Type Codes:
+# Type 0, no vertical tightness condition
+# Type 1, last token of this line is a non-block opening token
+# Type 2, first token of next line is a non-block closing
+# Type 3, isolated opening block brace
+# type 4, isolated closing block brace
+
+# Opening token flag values are the vertical tightness flags
+# 0 do not join with next line
+# 1 just one join per line
+# 2 any number of joins
+
+# Closing token flag values indicate spacing:
+# 0 = no space added before closing token
+# 1 = single space added before closing token
+
sub valign_input {
- # Place one line in the current vertical group.
+ #---------------------------------------------------------------------
+ # This is the front door of the vertical aligner. On each call
+ # we receive one line of specially marked text for vertical alignment.
+ # We compare the line with the current group, and either:
+ # - the line joins the current group if alignments match, or
+ # - the current group is flushed and a new group is started otherwise
+ #---------------------------------------------------------------------
#
# The key input parameters describing each line are:
# $level = indentation level of this line
# side comments. Tabs in these fields can mess up the column counting.
# The log file warns the user if there are any such tabs.
- my ( $self, $rline_hash ) = @_;
-
- my $level = $rline_hash->{level};
- my $level_end = $rline_hash->{level_end};
- my $indentation = $rline_hash->{indentation};
- my $list_seqno = $rline_hash->{list_seqno};
- my $outdent_long_lines = $rline_hash->{outdent_long_lines};
- my $is_terminal_ternary = $rline_hash->{is_terminal_ternary};
- my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
- my $break_alignment_before = $rline_hash->{break_alignment_before};
- my $break_alignment_after = $rline_hash->{break_alignment_after};
- my $Kend = $rline_hash->{Kend};
- my $ci_level = $rline_hash->{ci_level};
- my $maximum_line_length = $rline_hash->{maximum_line_length};
- my $forget_side_comment = $rline_hash->{forget_side_comment};
- my $rline_alignment = $rline_hash->{rline_alignment};
+ my ( $self, $rcall_hash ) = @_;
+
+ # Unpack the call args. This form is significantly faster than getting them
+ # one-by-one.
+ my (
+
+ $Kend,
+ $break_alignment_after,
+ $break_alignment_before,
+ $ci_level,
+ $forget_side_comment,
+ $indentation,
+ $is_terminal_ternary,
+ $level,
+ $level_end,
+ $list_seqno,
+ $maximum_line_length,
+ $outdent_long_lines,
+ $rline_alignment,
+ $rvertical_tightness_flags,
+
+ ) =
+
+ @{$rcall_hash}{
+ qw(
+ Kend
+ break_alignment_after
+ break_alignment_before
+ ci_level
+ forget_side_comment
+ indentation
+ is_terminal_ternary
+ level
+ level_end
+ list_seqno
+ maximum_line_length
+ outdent_long_lines
+ rline_alignment
+ rvertical_tightness_flags
+ )
+ };
my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
@{$rline_alignment};
}
}
- # do not join an opening block brace with an unbalanced line
- # unless requested with a flag value of 2
+ # do not join an opening block brace (type 3, see VTFLAGS)
+ # with an unbalanced line unless requested with a flag value of 2
if ( $cached_line_type == 3
&& !$self->group_line_count()
&& $cached_line_opening_flag < 2
# --------------------------------------------------------------------
# Collect outdentable block COMMENTS
# --------------------------------------------------------------------
- my $is_blank_line = "";
if ( $self->[_group_type_] eq 'COMMENT' ) {
- if (
- (
- $is_block_comment
- && $outdent_long_lines
- && $leading_space_count ==
- $self->[_comment_leading_space_count_]
- )
- || $is_blank_line
- )
+ if ( $is_block_comment
+ && $outdent_long_lines
+ && $leading_space_count == $self->[_comment_leading_space_count_] )
{
# Note that for a comment group we are not storing a line
my $rgroup_lines = $self->[_rgroup_lines_];
if ( $break_alignment_before && @{$rgroup_lines} ) {
- $rgroup_lines->[-1]->set_end_group(1);
+ $rgroup_lines->[-1]->{'end_group'} = 1;
}
# --------------------------------------------------------------------
$self->[_zero_count_]++;
if ( @{$rgroup_lines}
- && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
- )
+ && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
{
# flush the current group if it has some aligned columns..
# or we haven't seen a comment lately
- if ( $rgroup_lines->[0]->get_jmax() > 1
+ if ( $rgroup_lines->[0]->{'jmax'} > 1
|| $self->[_zero_count_] > 3 )
{
$self->_flush_group_lines();
+
+ # Update '$rgroup_lines' - it will become a ref to empty array.
+ # This allows avoiding a call to get_group_line_count below.
+ $rgroup_lines = $self->[_rgroup_lines_];
}
}
# start new COMMENT group if this comment may be outdented
if ( $is_block_comment
&& $outdent_long_lines
- && !$self->group_line_count() )
+ && !@{$rgroup_lines} )
{
$self->[_group_type_] = 'COMMENT';
$self->[_comment_leading_space_count_] = $leading_space_count;
# just write this line directly if no current group, no side comment,
# and no space recovery is needed.
- if ( !$self->group_line_count()
+ if ( !@{$rgroup_lines}
&& !get_recoverable_spaces($indentation) )
{
if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
$jmax += 1;
$rtokens->[ $jmax - 1 ] = '#';
- $rfields->[$jmax] = '';
+ $rfields->[$jmax] = EMPTY_STRING;
$rfield_lengths->[$jmax] = 0;
$rpatterns->[$jmax] = '#';
}
# --------------------------------------------------------------------
# create an object to hold this line
# --------------------------------------------------------------------
+
+ # The hash keys below must match the list of keys in %valid_LINE_keys.
+ # Values in this hash are accessed directly, except for 'ralignments',
+ # rather than with get/set calls for efficiency.
my $new_line = Perl::Tidy::VerticalAligner::Line->new(
{
jmax => $jmax,
leading_space_count => $leading_space_count,
outdent_long_lines => $outdent_long_lines,
list_seqno => $list_seqno,
- list_type => "",
+ list_type => EMPTY_STRING,
is_hanging_side_comment => $is_hanging_side_comment,
rvertical_tightness_flags => $rvertical_tightness_flags,
is_terminal_ternary => $is_terminal_ternary,
level_end => $level_end,
imax_pair => -1,
maximum_line_length => $maximum_line_length,
+
+ ralignments => [],
}
);
+ DEVEL_MODE
+ && check_keys( $new_line, \%valid_LINE_keys,
+ "Checking line keys at line definition", 1 );
+
# --------------------------------------------------------------------
# Decide if this is a simple list of items.
# We use this to be less restrictive in deciding what to align.
# the coding.
my ( $new_line, $old_line ) = @_;
- my $jmax = $new_line->get_jmax();
+ my $jmax = $new_line->{'jmax'};
# must be 2 fields
return 0 unless $jmax == 1;
- my $rtokens = $new_line->get_rtokens();
+ my $rtokens = $new_line->{'rtokens'};
# the second field must be a comment
return 0 unless $rtokens->[0] eq '#';
- my $rfields = $new_line->get_rfields();
+ my $rfields = $new_line->{'rfields'};
# the first field must be empty
return 0 unless $rfields->[0] =~ /^\s*$/;
# the current line must have fewer fields
- my $maximum_field_index = $old_line->get_jmax();
+ my $maximum_field_index = $old_line->{'jmax'};
return 0
unless $maximum_field_index > $jmax;
# looks ok..
- my $rpatterns = $new_line->get_rpatterns();
- my $rfield_lengths = $new_line->get_rfield_lengths();
+ my $rpatterns = $new_line->{'rpatterns'};
+ my $rfield_lengths = $new_line->{'rfield_lengths'};
+
+ $new_line->{'is_hanging_side_comment'} = 1;
- $new_line->set_is_hanging_side_comment(1);
- $jmax = $maximum_field_index;
- $new_line->set_jmax($jmax);
+ $jmax = $maximum_field_index;
+ $new_line->{'jmax'} = $jmax;
$rfields->[$jmax] = $rfields->[1];
$rfield_lengths->[$jmax] = $rfield_lengths->[1];
$rtokens->[ $jmax - 1 ] = $rtokens->[0];
$rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
+
foreach my $j ( 1 .. $jmax - 1 ) {
- $rfields->[$j] = '';
+ $rfields->[$j] = EMPTY_STRING;
$rfield_lengths->[$j] = 0;
- $rtokens->[ $j - 1 ] = "";
- $rpatterns->[ $j - 1 ] = "";
+ $rtokens->[ $j - 1 ] = EMPTY_STRING;
+ $rpatterns->[ $j - 1 ] = EMPTY_STRING;
}
return 1;
}
# of the field separators are commas or comma-arrows (except for the
# trailing #)
- my $rtokens = $line->get_rtokens();
+ my $rtokens = $line->{'rtokens'};
my $test_token = $rtokens->[0];
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($test_token);
if ( $is_comma_token{$raw_tok} ) {
my $list_type = $test_token;
- my $jmax = $line->get_jmax();
+ my $jmax = $line->{'jmax'};
foreach ( 1 .. $jmax - 2 ) {
( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token( $rtokens->[$_] );
if ( !$is_comma_token{$raw_tok} ) {
- $list_type = "";
+ $list_type = EMPTY_STRING;
last;
}
}
- $line->set_list_type($list_type);
+ $line->{'list_type'} = $list_type;
}
return;
}
}
my $jmax = @{$rfields} - 1;
- my $rfields_old = $old_line->get_rfields();
+ my $rfields_old = $old_line->{'rfields'};
- my $rpatterns_old = $old_line->get_rpatterns();
- my $rtokens_old = $old_line->get_rtokens();
- my $maximum_field_index = $old_line->get_jmax();
+ my $rpatterns_old = $old_line->{'rpatterns'};
+ my $rtokens_old = $old_line->{'rtokens'};
+ my $maximum_field_index = $old_line->{'jmax'};
# look for the question mark after the :
my ($jquestion);
my $depth_question;
- my $pad = "";
+ my $pad = EMPTY_STRING;
my $pad_length = 0;
foreach my $j ( 0 .. $maximum_field_index - 1 ) {
my $tok = $rtokens_old->[$j];
$jquestion = $j;
if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
$pad_length = length($1);
- $pad = " " x $pad_length;
+ $pad = SPACE x $pad_length;
}
else {
return; # shouldn't happen
my @field_lengths = @{$rfield_lengths};
EXPLAIN_TERNARY && do {
- local $" = '><';
+ local $LIST_SEPARATOR = '><';
print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
- splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
+ splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
+ splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
# handle sub-case of first field just equal to leading colon.
# leading token and inserting appropriate number of empty fields
splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
- splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
+ splice( @fields, 1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
+ splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
}
$jadd = $jquestion + 1;
$fields[0] = $pad . $fields[0];
$field_lengths[0] = $pad_length + $field_lengths[0];
- splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
- splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
+ splice( @fields, 0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
+ splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
}
EXPLAIN_TERNARY && do {
- local $" = '><';
+ local $LIST_SEPARATOR = '><';
print STDOUT "MODIFIED TOKENS=<@tokens>\n";
print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
print STDOUT "MODIFIED FIELDS=<@fields>\n";
}
# check for balanced else block following if/elsif/unless
- my $rfields_old = $old_line->get_rfields();
+ my $rfields_old = $old_line->{'rfields'};
# TBD: add handling for 'case'
return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
# probably: "else # side_comment"
else { return }
- my $rpatterns_old = $old_line->get_rpatterns();
- my $rtokens_old = $old_line->get_rtokens();
- my $maximum_field_index = $old_line->get_jmax();
+ my $rpatterns_old = $old_line->{'rpatterns'};
+ my $rtokens_old = $old_line->{'rtokens'};
+ my $maximum_field_index = $old_line->{'jmax'};
# be sure the previous if/elsif is followed by an opening paren
my $jparen = 0;
my $jadd = $jbrace - $jparen;
splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
- splice( @{$rfields}, 1, 0, ('') x $jadd );
+ splice( @{$rfields}, 1, 0, (EMPTY_STRING) x $jadd );
splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
# force a flush after this line if it does not follow a case
my %is_closing_block_type;
BEGIN {
- @_ = qw< } ] >;
- @is_closing_block_type{@_} = (1) x scalar(@_);
+ my @q = qw< } ] >;
+ @is_closing_block_type{@q} = (1) x scalar(@q);
}
+# This is a flag for testing alignment by sub sweep_left_to_right only.
+# This test can help find problems with the alignment logic.
+# This flag should normally be zero.
+use constant TEST_SWEEP_ONLY => 0;
+
+use constant EXPLAIN_CHECK_MATCH => 0;
+
sub check_match {
# See if the current line matches the current vertical alignment group.
# $prev_line = the line just before $new_line
# returns a flag and a value as follows:
- # return (0, $imax_align) if the line does not match
- # return (1, $imax_align) if the line matches but does not fit
- # return (2, $imax_align) if the line matches and fits
+ # return (0, $imax_align) if the line does not match
+ # return (1, $imax_align) if the line matches but does not fit
+ # return (2, $imax_align) if the line matches and fits
+
+ use constant NO_MATCH => 0;
+ use constant MATCH_NO_FIT => 1;
+ use constant MATCH_AND_FIT => 2;
+
+ my $return_value;
# Returns '$imax_align' which is the index of the maximum matching token.
# It will be used in the subsequent left-to-right sweep to align as many
my $imax_align = -1;
# variable $GoToMsg explains reason for no match, for debugging
- my $GoToMsg = "";
- use constant EXPLAIN_CHECK_MATCH => 0;
-
- # This is a flag for testing alignment by sub sweep_left_to_right only.
- # This test can help find problems with the alignment logic.
- # This flag should normally be zero.
- use constant TEST_SWEEP_ONLY => 0;
+ my $GoToMsg = EMPTY_STRING;
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $base_line->get_jmax();
+ my $jmax = $new_line->{'jmax'};
+ my $maximum_field_index = $base_line->{'jmax'};
my $jlimit = $jmax - 2;
if ( $jmax > $maximum_field_index ) {
$jlimit = $maximum_field_index - 2;
}
- if ( $new_line->get_is_hanging_side_comment() ) {
+ if ( $new_line->{'is_hanging_side_comment'} ) {
# HSC's can join the group if they fit
}
# A group with hanging side comments ends with the first non hanging
# side comment.
- if ( $base_line->get_is_hanging_side_comment() ) {
- $GoToMsg = "end of hanging side comments";
- goto NO_MATCH;
+ if ( $base_line->{'is_hanging_side_comment'} ) {
+ $GoToMsg = "end of hanging side comments";
+ $return_value = NO_MATCH;
}
+ else {
- # The number of tokens that this line shares with the previous line
- # has been stored with the previous line. This value was calculated
- # and stored by sub 'match_line_pair'.
- $imax_align = $prev_line->get_imax_pair();
+ # The number of tokens that this line shares with the previous
+ # line has been stored with the previous line. This value was
+ # calculated and stored by sub 'match_line_pair'.
+ $imax_align = $prev_line->{'imax_pair'};
- if ( $imax_align != $jlimit ) {
- $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
- goto NO_MATCH;
+ if ( $imax_align != $jlimit ) {
+ $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
+ $return_value = NO_MATCH;
+ }
}
-
}
- # The tokens match, but the lines must have identical number of
- # tokens to join the group.
- if ( $maximum_field_index != $jmax ) {
- $GoToMsg = "token count differs";
- goto NO_MATCH;
- }
+ if ( !defined($return_value) ) {
- # The tokens match. Now See if there is space for this line in the
- # current group.
- if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
+ # The tokens match, but the lines must have identical number of
+ # tokens to join the group.
+ if ( $maximum_field_index != $jmax ) {
+ $GoToMsg = "token count differs";
+ $return_value = NO_MATCH;
+ }
- EXPLAIN_CHECK_MATCH
- && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
- return ( 2, $jlimit );
- }
- else {
+ # The tokens match. Now See if there is space for this line in the
+ # current group.
+ elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
+ {
- EXPLAIN_CHECK_MATCH
- && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
- return ( 1, $jlimit );
+ $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
+ $return_value = MATCH_AND_FIT;
+ $imax_align = $jlimit;
+ }
+ else {
+ $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
+ $return_value = MATCH_NO_FIT;
+ $imax_align = $jlimit;
+ }
}
- NO_MATCH:
-
EXPLAIN_CHECK_MATCH
&& print
- "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
+"returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
- return ( 0, $imax_align );
+ return ( $return_value, $imax_align );
}
sub check_fit {
# return true if successful
# return false if not successful
- my $jmax = $new_line->get_jmax();
- my $leading_space_count = $new_line->get_leading_space_count();
- my $rfield_lengths = $new_line->get_rfield_lengths();
+ my $jmax = $new_line->{'jmax'};
+ my $leading_space_count = $new_line->{'leading_space_count'};
+ my $rfield_lengths = $new_line->{'rfield_lengths'};
my $padding_available = $old_line->get_available_space_on_right();
- my $jmax_old = $old_line->get_jmax();
- my $rtokens_old = $old_line->get_rtokens();
+ my $jmax_old = $old_line->{'jmax'};
+ my $rtokens_old = $old_line->{'rtokens'};
# Safety check ... only lines with equal array sizes should arrive here
# from sub check_match. So if this error occurs, look at recent changes in
}
# Save current columns in case this line does not fit.
- my @alignments = $old_line->get_alignments();
+ my @alignments = @{ $old_line->{'ralignments'} };
foreach my $alignment (@alignments) {
$alignment->save_column();
}
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ my $is_hanging_side_comment = $new_line->{'is_hanging_side_comment'};
# Loop over all alignments ...
- my $maximum_field_index = $old_line->get_jmax();
+ my $maximum_field_index = $old_line->{'jmax'};
for my $j ( 0 .. $jmax ) {
my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
# Revert to the starting state if does not fit
if ( $pad > $padding_available ) {
- ################################################
+ #----------------------------------------------
# Line does not fit -- revert to starting state
- ################################################
+ #----------------------------------------------
foreach my $alignment (@alignments) {
$alignment->restore_column();
}
$padding_available -= $pad;
}
- ######################################
+ #-------------------------------------
# The line fits, the match is accepted
- ######################################
+ #-------------------------------------
return 1;
}
my ($new_line) = @_;
- my $jmax = $new_line->get_jmax();
- my $rfield_lengths = $new_line->get_rfield_lengths();
- my $col = $new_line->get_leading_space_count();
+ my $jmax = $new_line->{'jmax'};
+ my $rfield_lengths = $new_line->{'rfield_lengths'};
+ my $col = $new_line->{'leading_space_count'};
+ my @alignments;
for my $j ( 0 .. $jmax ) {
$col += $rfield_lengths->[$j];
# create initial alignments for the new group
my $alignment =
Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
- $new_line->set_alignment( $j, $alignment );
+ push @alignments, $alignment;
}
+ $new_line->{'ralignments'} = \@alignments;
return;
}
sub copy_old_alignments {
my ( $new_line, $old_line ) = @_;
- my @new_alignments = $old_line->get_alignments();
- $new_line->set_alignments(@new_alignments);
+ my @new_alignments = @{ $old_line->{'ralignments'} };
+ $new_line->{'ralignments'} = \@new_alignments;
return;
}
sub dump_array {
# debug routine to dump array contents
- local $" = ')(';
+ local $LIST_SEPARATOR = ')(';
print STDOUT "(@_)\n";
return;
}
"APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
};
- ############################################
+ #-------------------------------------------
# Section 1: Handle a group of COMMENT lines
- ############################################
+ #-------------------------------------------
if ( $group_type eq 'COMMENT' ) {
$self->_flush_comment_lines();
return;
}
- #########################################################################
+ #------------------------------------------------------------------------
# Section 2: Handle line(s) of CODE. Most of the actual work of vertical
# aligning happens here in the following steps:
- #########################################################################
+ #------------------------------------------------------------------------
# STEP 1: Remove most unmatched tokens. They block good alignments.
my ( $max_lev_diff, $saw_side_comment ) =
# Otherwise, assume the next line has the level of the end of last line.
# This fixes case c008.
else {
- my $level_end = $rgroup_lines->[-1]->get_level_end();
+ my $level_end = $rgroup_lines->[-1]->{'level_end'};
$extra_indent_ok = $group_level > $level_end;
}
}
# STEP 6: Output the lines.
# All lines in this group have the same leading spacing and maximum line
# length
- my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
- my $group_maximum_line_length =
- $rgroup_lines->[0]->get_maximum_line_length();
+ my $group_leader_length = $rgroup_lines->[0]->{'leading_space_count'};
+ my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
foreach my $line ( @{$rgroup_lines} ) {
$self->valign_output_step_A(
# Let the formatter know that this object has been processed and any
# recoverable spaces have been handled. This is needed for setting the
# closing paren location in -lp mode.
- my $object = $rgroup_lines->[0]->get_indentation();
+ my $object = $rgroup_lines->[0]->{'indentation'};
if ( ref($object) ) { $object->set_recoverable_spaces(0) }
$self->initialize_for_new_group();
my $line_0 = $rall_lines->[$jbeg];
my $line_1 = $rall_lines->[$jend];
- my $imax_pair = $line_1->get_imax_pair();
+ my $imax_pair = $line_1->{'imax_pair'};
if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
## flag for possible future use:
## my $is_isolated_pair = $imax_pair < 0
## && ( $jbeg == 0
- ## || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
+ ## || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
my $imax_prev =
- $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
+ $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
my ( $is_marginal, $imax_align_fix ) =
is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
# Unset the _end_group flag for the last line if it it set because it
# is not needed and can causes problems for -lp formatting
- $rall_lines->[-1]->set_end_group(0);
+ $rall_lines->[-1]->{'end_group'} = 0;
# Loop over all lines ...
my $jline = -1;
# Start a new subgroup if necessary
if ( !$group_line_count ) {
add_to_rgroup($jline);
- if ( $new_line->get_end_group() ) {
+ if ( $new_line->{'end_group'} ) {
end_rgroup(-1);
}
next;
}
- my $j_terminal_match = $new_line->get_j_terminal_match();
+ my $j_terminal_match = $new_line->{'j_terminal_match'};
my ( $jbeg, $jend ) = get_rgroup_jrange();
if ( !defined($jbeg) ) {
#
# If this were not desired, the next step could be skipped.
# -------------------------------------------------------------
- if ( $new_line->get_is_hanging_side_comment() ) {
+ if ( $new_line->{'is_hanging_side_comment'} ) {
join_hanging_comment( $new_line, $base_line );
}
# If this line has no matching tokens, then flush out the lines
# BEFORE this line unless both it and the previous line have side
- # comments. This prevents this line from pushing side coments out
+ # comments. This prevents this line from pushing side comments out
# to the right.
- elsif ( $new_line->get_jmax() == 1 ) {
+ elsif ( $new_line->{'jmax'} == 1 ) {
# There are no matching tokens, so now check side comments.
# Programming note: accessing arrays with index -1 is
# risky in Perl, but we have verified there is at least one
# line in the group and that there is at least one field.
my $prev_comment =
- $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
- my $side_comment = $new_line->get_rfields()->[-1];
+ $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
+ my $side_comment = $new_line->{'rfields'}->[-1];
end_rgroup(-1) unless ( $side_comment && $prev_comment );
}
}
# do not let sweep_left_to_right change an isolated 'else'
- if ( !$new_line->get_is_terminal_ternary() ) {
+ if ( !$new_line->{'is_terminal_ternary'} ) {
block_penultimate_match();
}
}
}
# end the group if we know we cannot match next line.
- elsif ( $new_line->get_end_group() ) {
+ elsif ( $new_line->{'end_group'} ) {
end_rgroup(-1);
}
} ## end loop over lines
# two isolated (list) lines
# imax_min = number of common alignment tokens
# Return:
- # $pad_max = maximum suggested pad distnce
+ # $pad_max = maximum suggested pad distance
# = 0 if alignment not recommended
# Note that this is only for two lines which do not have alignment tokens
# in common with any other lines. It is intended for lists, but it might
# 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
# 1, 0, 0, 0, undef, 0, 0
# ];
- my $rfield_lengths = $line->get_rfield_lengths();
- my $rfield_lengths_m = $line_m->get_rfield_lengths();
+ my $rfield_lengths = $line->{'rfield_lengths'};
+ my $rfield_lengths_m = $line_m->{'rfield_lengths'};
# Safety check - shouldn't happen
return 0
my $lensum_m = 0;
my $lensum = 0;
- for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ foreach my $i ( 0 .. $imax_min ) {
$lensum_m += $rfield_lengths_m->[$i];
$lensum += $rfield_lengths->[$i];
}
$lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
my $patterns_match;
- if ( $line_m->get_list_type() && $line->get_list_type() ) {
+ if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
$patterns_match = 1;
- my $rpatterns_m = $line_m->get_rpatterns();
- my $rpatterns = $line->get_rpatterns();
- for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $rpatterns_m = $line_m->{'rpatterns'};
+ my $rpatterns = $line->{'rpatterns'};
+ foreach my $i ( 0 .. $imax_min ) {
my $pat = $rpatterns->[$i];
my $pat_m = $rpatterns_m->[$i];
if ( $pat ne $pat_m ) { $patterns_match = 0; last }
my $ng_max = @{$rgroups} - 1;
return unless ( $ng_max > 0 );
- ############################################################################
+ #---------------------------------------------------------------------
# Step 1: Loop over groups to find all common leading alignment tokens
- ############################################################################
+ #---------------------------------------------------------------------
my $line;
my $rtokens;
( $jbeg, $jend, $istop ) = @{$item};
$line = $rlines->[$jbeg];
- $rtokens = $line->get_rtokens();
- $imax = $line->get_jmax() - 2;
+ $rtokens = $line->{'rtokens'};
+ $imax = $line->{'jmax'} - 2;
$istop = -1 unless ( defined($istop) );
$istop = $imax if ( $istop > $imax );
# Special treatment of two one-line groups isolated from other lines,
# unless they form a simple list or a terminal match. Otherwise the
# alignment can look strange in some cases.
- my $list_type = $rlines->[$jbeg]->get_list_type();
+ my $list_type = $rlines->[$jbeg]->{'list_type'};
if (
$jend == $jbeg
&& $jend_m == $jbeg_m
&& ( $ng == 1 || $istop_mm < 0 )
&& ( $ng == $ng_max || $istop < 0 )
- && !$line->get_j_terminal_match()
+ && !$line->{'j_terminal_match'}
# Only do this for imperfect matches. This is normally true except
# when two perfect matches cannot form a group because the line
}
return unless @icommon;
- ###########################################################
+ #----------------------------------------------------------
# Step 2: Reorder and consolidate the list into a task list
- ###########################################################
+ #----------------------------------------------------------
# We have to work first from lowest token index to highest, then by group,
# sort our list first on token index then group number
push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
}
- ###############################
+ #------------------------------
# Step 3: Execute the task list
- ###############################
+ #------------------------------
do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
$group_level );
return;
# $blocking_level[$nj is the level at a match failure between groups
# $ng-1 and $ng
my @blocking_level;
- my $group_list_type = $rlines->[0]->get_list_type();
+ my $group_list_type = $rlines->[0]->{'list_type'};
my $move_to_common_column = sub {
# (the first line). All of the rest will be changed
# automatically.
my $line = $rlines->[$ix_beg];
- my $jmax = $line->get_jmax();
+ my $jmax = $line->{'jmax'};
# the maximum space without exceeding the line length:
my $avail = $line->get_available_space_on_right();
# my $unknown6 = pack( "VV", 0x00, 0x1000 );
# On the other hand, it is okay to keep matching at the same
- # level such as in a simple list of commas and/or fat arrors.
+ # level such as in a simple list of commas and/or fat commas.
my $is_blocked = defined( $blocking_level[$ng] )
&& $lev > $blocking_level[$ng];
# Do not let one or two lines with a **different number of
# alignments** open up a big gap in a large block. For
# example, we will prevent something like this, where the first
- # line prys open the rest:
+ # line pries open the rest:
# $worksheet->write( "B7", "http://www.perl.com", undef, $format );
# $worksheet->write( "C7", "", $format );
return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
- my $jmax_old = $line_obj->get_jmax();
- my $rfields_old = $line_obj->get_rfields();
- my $rfield_lengths_old = $line_obj->get_rfield_lengths();
- my $rpatterns_old = $line_obj->get_rpatterns();
- my $rtokens_old = $line_obj->get_rtokens();
- my $j_terminal_match = $line_obj->get_j_terminal_match();
+ my $jmax_old = $line_obj->{'jmax'};
+ my $rfields_old = $line_obj->{'rfields'};
+ my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
+ my $rpatterns_old = $line_obj->{'rpatterns'};
+ my $rtokens_old = $line_obj->{'rtokens'};
+ my $j_terminal_match = $line_obj->{'j_terminal_match'};
use constant EXPLAIN_DELETE_SELECTED => 0;
- local $" = '> <';
+ local $LIST_SEPARATOR = '> <';
EXPLAIN_DELETE_SELECTED && print <<EOM;
delete indexes: <@{$ridel}>
old jmax: $jmax_old
my %delete_me;
@delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
- my $pattern = $rpatterns_old->[0];
- my $field = $rfields_old->[0];
- my $field_length = $rfield_lengths_old->[0];
- push @{$rfields_new}, $field;
- push @{$rfield_lengths_new}, $field_length;
- push @{$rpatterns_new}, $pattern;
+ my $pattern_0 = $rpatterns_old->[0];
+ my $field_0 = $rfields_old->[0];
+ my $field_length_0 = $rfield_lengths_old->[0];
+ push @{$rfields_new}, $field_0;
+ push @{$rfield_lengths_new}, $field_length_0;
+ push @{$rpatterns_new}, $pattern_0;
# Loop to either copy items or concatenate fields and patterns
my $jmin_del;
- for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
+ foreach my $j ( 0 .. $jmax_old - 1 ) {
my $token = $rtokens_old->[$j];
my $field = $rfields_old->[ $j + 1 ];
my $field_length = $rfield_lengths_old->[ $j + 1 ];
#f 0 1 2 3 <- field and pattern
my $jmax_new = @{$rfields_new} - 1;
- $line_obj->set_rtokens($rtokens_new);
- $line_obj->set_rpatterns($rpatterns_new);
- $line_obj->set_rfields($rfields_new);
- $line_obj->set_rfield_lengths($rfield_lengths_new);
- $line_obj->set_jmax($jmax_new);
+ $line_obj->{'rtokens'} = $rtokens_new;
+ $line_obj->{'rpatterns'} = $rpatterns_new;
+ $line_obj->{'rfields'} = $rfields_new;
+ $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
+ $line_obj->{'jmax'} = $jmax_new;
# The value of j_terminal_match will be incorrect if we delete tokens prior
# to it. We will have to give up on aligning the terminal tokens if this
# happens.
if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
- $line_obj->set_j_terminal_match(undef);
+ $line_obj->{'j_terminal_match'} = undef;
}
# update list type -
- if ( $line_obj->get_list_seqno() ) {
+ if ( $line_obj->{'list_seqno'} ) {
## This works, but for efficiency see if we need to make a change:
## decide_if_list($line_obj);
# An existing list will still be a list but with possibly different
# leading token
- my $old_list_type = $line_obj->get_list_type();
- my $new_list_type = "";
+ my $old_list_type = $line_obj->{'list_type'};
+ my $new_list_type = EMPTY_STRING;
if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
$new_list_type = $rtokens_new->[0];
}
return @{ $decoded_token{$tok} };
}
- my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
+ my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
$raw_tok = $1;
$lev = $2;
}
- # This flag is for testing only and should normally be zero.
- use constant TEST_DELETE_NULL => 0;
-
sub delete_unmatched_tokens {
my ( $rlines, $group_level ) = @_;
- # This is a preliminary step in vertical alignment in which we remove
- # as many obviously un-needed alignment tokens as possible. This will
- # prevent them from interfering with the final alignment.
+ # This is a important first step in vertical alignment in which
+ # we remove as many obviously un-needed alignment tokens as possible.
+ # This will prevent them from interfering with the final alignment.
- # These are the return values
+ # Returns:
my $max_lev_diff = 0; # used to avoid a call to prune_tree
my $saw_side_comment = 0; # used to avoid a call for side comments
# Handle a single line
if ( @{$rlines} == 1 ) {
my $line = $rlines->[0];
- my $jmax = $line->get_jmax();
- my $length = $line->get_rfield_lengths()->[$jmax];
+ my $jmax = $line->{'jmax'};
+ my $length = $line->{'rfield_lengths'}->[$jmax];
$saw_side_comment = $length > 0;
return ( $max_lev_diff, $saw_side_comment );
}
- my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
+ my $has_terminal_match = $rlines->[-1]->{'j_terminal_match'};
# ignore hanging side comments in these operations
- my @filtered = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
+ my @filtered = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
my $rnew_lines = \@filtered;
$saw_side_comment = @filtered != @{$rlines};
my @equals_info;
my @line_info;
- my %is_good_tok;
- # create a hash of tokens for each line
+ #------------------------------------------------------------
+ # Loop to create a hash of alignment token info for each line
+ #------------------------------------------------------------
my $rline_hashes = [];
foreach my $line ( @{$rnew_lines} ) {
my $rhash = {};
- my $rtokens = $line->get_rtokens();
- my $rpatterns = $line->get_rpatterns();
+ my $rtokens = $line->{'rtokens'};
+ my $rpatterns = $line->{'rpatterns'};
my $i = 0;
my ( $i_eq, $tok_eq, $pat_eq );
my ( $lev_min, $lev_max );
}
else {
if ( !$saw_side_comment ) {
- my $length = $line->get_rfield_lengths()->[ $i + 1 ];
+ my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
$saw_side_comment ||= $length;
}
}
}
}
- # compare each line pair and record matches
+ #----------------------------------------------------
+ # Loop to compare each line pair and remember matches
+ #----------------------------------------------------
my $rtok_hash = {};
my $nr = 0;
- for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
+ foreach my $jl ( 0 .. $jmax - 1 ) {
my $nl = $nr;
$nr = 0;
my $jr = $jl + 1;
my $rhash_l = $rline_hashes->[$jl];
my $rhash_r = $rline_hashes->[$jr];
- my $count = 0; # UNUSED NOW?
- my $ntoks = 0;
foreach my $tok ( keys %{$rhash_l} ) {
- $ntoks++;
if ( defined( $rhash_r->{$tok} ) ) {
- if ( $tok ne '#' ) { $count++; }
my $il = $rhash_l->{$tok}->[0];
my $ir = $rhash_r->{$tok}->[0];
$rhash_l->{$tok}->[2] = $ir;
# Set a line break if no matching tokens between these lines
# (this is not strictly necessary now but does not hurt)
if ( $nr == 0 && $nl > 0 ) {
- $rnew_lines->[$jl]->set_end_group(1);
+ $rnew_lines->[$jl]->{'end_group'} = 1;
}
# Also set a line break if both lines have simple equals but with
if ( defined($i_eq_l) && defined($i_eq_r) ) {
# Also, do not align equals across a change in ci level
- my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
- $rnew_lines->[$jr]->get_ci_level();
+ my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
+ $rnew_lines->[$jr]->{'ci_level'};
if (
$tok_eq_l eq $tok_eq_r
|| $ci_jump )
)
{
- $rnew_lines->[$jl]->set_end_group(1);
+ $rnew_lines->[$jl]->{'end_group'} = 1;
}
}
}
- # find subgroups
+ #------------------------------------------------------------
+ # Find independent subgroups of lines. Neighboring subgroups
+ # do not have a common alignment token.
+ #------------------------------------------------------------
my @subgroups;
push @subgroups, [ 0, $jmax ];
- for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
- if ( $rnew_lines->[$jl]->get_end_group() ) {
+ foreach my $jl ( 0 .. $jmax - 1 ) {
+ if ( $rnew_lines->[$jl]->{'end_group'} ) {
$subgroups[-1]->[1] = $jl;
push @subgroups, [ $jl + 1, $jmax ];
}
# flag to allow skipping pass 2
my $saw_large_group;
- ############################################################
+ #-----------------------------------------------------------
# PASS 1 over subgroups to remove unmatched alignment tokens
- ############################################################
+ #-----------------------------------------------------------
foreach my $item (@subgroups) {
my ( $jbeg, $jend ) = @{$item};
my $nlines = $jend - $jbeg + 1;
- ####################################################
+ #---------------------------------------------------
# Look for complete if/elsif/else and ternary blocks
- ####################################################
+ #---------------------------------------------------
# We are looking for a common '$dividing_token' like these:
my %token_line_count;
if ( $nlines > 2 ) {
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ foreach my $jj ( $jbeg .. $jend ) {
my %seen;
my $line = $rnew_lines->[$jj];
- my $rtokens = $line->get_rtokens();
+ my $rtokens = $line->{'rtokens'};
foreach my $tok ( @{$rtokens} ) {
if ( !$seen{$tok} ) {
$seen{$tok}++;
}
}
- #####################################################
- # Loop over lines to remove unwanted alignment tokens
- #####################################################
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ #-------------------------------------------------------------
+ # Loop over subgroup lines to remove unwanted alignment tokens
+ #-------------------------------------------------------------
+ foreach my $jj ( $jbeg .. $jend ) {
my $line = $rnew_lines->[$jj];
- my $rtokens = $line->get_rtokens();
+ my $rtokens = $line->{'rtokens'};
my $rhash = $rline_hashes->[$jj];
my $i_eq = $equals_info[$jj]->[0];
my @idel;
my $delete_above_level;
my $deleted_assignment_token;
- my $saw_dividing_token = "";
+ my $saw_dividing_token = EMPTY_STRING;
$saw_large_group ||= $nlines > 2 && $imax > 1;
# Loop over all alignment tokens
- for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ foreach my $i ( 0 .. $imax ) {
my $tok = $rtokens->[$i];
next if ( $tok eq '#' ); # shouldn't happen
my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
@{ $rhash->{$tok} };
- #######################################################
+ #------------------------------------------------------
# Here is the basic RULE: remove an unmatched alignment
# which does not occur in the surrounding lines.
- #######################################################
+ #------------------------------------------------------
my $delete_me = !defined($il) && !defined($ir);
# Apply any user controls. Note that not all lines pass
# );
if ( defined($delete_above_level) ) {
if ( $lev > $delete_above_level ) {
- $delete_me ||= 1; #$tag;
+ $delete_me ||= 1;
}
else { $delete_above_level = undef }
}
&& $nlines == 2 );
# EXCEPTION 5: misc additional rules for commas and equals
- if ($delete_me) {
+ if ( $delete_me && $tok_count == 1 ) {
# okay to delete second and higher copies of a token
- if ( $tok_count == 1 ) {
-
- # for a comma...
- if ( $raw_tok eq ',' ) {
- # Do not delete commas before an equals
- $delete_me = 0
- if ( defined($i_eq) && $i < $i_eq );
+ # for a comma...
+ if ( $raw_tok eq ',' ) {
- # Do not delete line-level commas
- $delete_me = 0 if ( $lev <= $group_level );
- }
+ # Do not delete commas before an equals
+ $delete_me = 0
+ if ( defined($i_eq) && $i < $i_eq );
- # For an assignment at group level..
- if ( $is_assignment{$raw_tok}
- && $lev == $group_level )
- {
+ # Do not delete line-level commas
+ $delete_me = 0 if ( $lev <= $group_level );
+ }
- # Do not delete if it is the last alignment of
- # multiple tokens; this will prevent some
- # undesirable alignments
- if ( $imax > 0 && $i == $imax ) {
- $delete_me = 0;
- }
+ # For an assignment at group level..
+ if ( $is_assignment{$raw_tok}
+ && $lev == $group_level )
+ {
- # Otherwise, set a flag to delete most
- # remaining tokens
- else { $deleted_assignment_token = $raw_tok }
+ # Do not delete if it is the last alignment of
+ # multiple tokens; this will prevent some
+ # undesirable alignments
+ if ( $imax > 0 && $i == $imax ) {
+ $delete_me = 0;
}
+
+ # Otherwise, set a flag to delete most
+ # remaining tokens
+ else { $deleted_assignment_token = $raw_tok }
}
}
# Do not let a user exclusion be reactivated by above rules
$delete_me ||= !$align_ok;
- #####################################
+ #------------------------------------
# Add this token to the deletion list
- #####################################
+ #------------------------------------
if ($delete_me) {
push @idel, $i;
} # End loopover lines
} # End loop over subgroups
- #################################################
- # PASS 2 over subgroups to remove null alignments
- #################################################
-
- # This pass is only used for testing. It is helping to identify
- # alignment situations which might be improved with a future more
- # general algorithm which adds a tail matching capability.
- if (TEST_DELETE_NULL) {
- delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
- if ($saw_large_group);
- }
+ # End PASS 1
- # PASS 3: Construct a tree of matched lines and delete some small deeper
- # levels of tokens. They also block good alignments.
+ #----------------------------------------------------------------
+ # PASS 2: Construct a tree of matched lines and delete some small
+ # deeper levels of tokens. They also block good alignments.
+ #----------------------------------------------------------------
prune_alignment_tree($rnew_lines) if ($max_lev_diff);
- # PASS 4: compare all lines for common tokens
+ #--------------------------------------------
+ # PASS 3: compare all lines for common tokens
+ #--------------------------------------------
match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
return ( $max_lev_diff, $saw_side_comment );
}
}
-sub delete_null_alignments {
- my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
-
- # This is an optional second pass for deleting alignment tokens which can
- # occasionally improve alignment. We look for and remove 'null
- # alignments', which are alignments that require no padding. So we can
- # 'cheat' and delete them. For example, notice the '=~' alignment in the
- # first two lines of the following code:
-
- # $sysname .= 'del' if $self->label =~ /deletion/;
- # $sysname .= 'ins' if $self->label =~ /insertion/;
- # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
-
- # These '=~' tokens are already aligned because they are both the same
- # distance from the previous alignment token, the 'if'. So we can
- # eliminate them as alignments. The advantage is that in some cases, such
- # as this one, this will allow other tokens to be aligned. In this case we
- # then get the 'if' tokens to align:
-
- # $sysname .= 'del' if $self->label =~ /deletion/;
- # $sysname .= 'ins' if $self->label =~ /insertion/;
- # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
-
- # The following rules for limiting this operation have been found to
- # work well and avoid problems:
-
- # Rule 1. We only consider a sequence of lines which have the same
- # sequence of alignment tokens.
-
- # Rule 2. We never eliminate the first alignment token. One reason is that
- # lines may have different leading indentation spaces, so keeping the
- # first alignment token insures that our length measurements start at
- # a well-defined point. Another reason is that nothing is gained because
- # the left-to-right sweep can always handle alignment of this token.
-
- # Rule 3. We require that the first alignment token exist in either
- # a previous line or a subsequent line. The reason is that this avoids
- # changing two-line matches which go through special logic.
-
- # Rule 4. Do not delete a token which occurs in a previous or subsequent
- # line. For example, in the above example, it was ok to eliminate the '=~'
- # token from two lines because it did not occur in a surrounding line.
- # If it did occur in a surrounding line, the result could be confusing
- # or even incorrectly aligned.
-
- # A consequence of these rules is that we only need to consider subgroups
- # with at least 3 lines and 2 alignment tokens.
-
- # The subgroup line index range
- my ( $jbeg, $jend );
-
- # Vars to keep track of the start of a current sequence of matching
- # lines.
- my $rtokens_match;
- my $rfield_lengths_match;
- my $j_match_beg;
- my $j_match_end;
- my $imax_match;
- my $rneed_pad;
-
- # Vars for a line being tested
- my $rtokens;
- my $rfield_lengths;
- my $imax;
-
- my $start_match = sub {
- my ($jj) = @_;
- $rtokens_match = $rtokens;
- $rfield_lengths_match = $rfield_lengths;
- $j_match_beg = $jj;
- $j_match_end = $jj;
- $imax_match = $imax;
- $rneed_pad = [];
- return;
- };
-
- my $add_to_match = sub {
- my ($jj) = @_;
- $j_match_end = $jj;
-
- # Keep track of any padding that would be needed for each token
- for ( my $i = 0 ; $i <= $imax ; $i++ ) {
- next if ( $rneed_pad->[$i] );
- my $length = $rfield_lengths->[$i];
- my $length_match = $rfield_lengths_match->[$i];
- if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
- }
- return;
- };
-
- my $end_match = sub {
- return unless ( $j_match_end > $j_match_beg );
- my $nlines = $j_match_end - $j_match_beg + 1;
- my $rhash_beg = $rline_hashes->[$j_match_beg];
- my $rhash_end = $rline_hashes->[$j_match_end];
- my @idel;
-
- # Do not delete unless the first token also occurs in a surrounding line
- my $tok0 = $rtokens_match->[0];
- return
- unless (
- (
- $j_match_beg > $jbeg
- && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
- $tok0
- )
- || ( $j_match_end < $jend
- && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
- $tok0 )
- );
-
- # Note that we are skipping the token at i=0
- for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
-
- # do not delete a token which requires padding to align
- next if ( $rneed_pad->[$i] );
-
- my $tok = $rtokens_match->[$i];
-
- # Do not delete a token which occurs in a surrounding line
- next
- if ( $j_match_beg > $jbeg
- && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
- next
- if ( $j_match_end < $jend
- && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
-
- # ok to delete
- push @idel, $i;
- ##print "ok to delete tok=$tok\n";
- }
- if (@idel) {
- foreach my $j ( $j_match_beg .. $j_match_end ) {
- delete_selected_tokens( $rnew_lines->[$j], \@idel );
- }
- }
- return;
- };
-
- foreach my $item ( @{$rsubgroups} ) {
- ( $jbeg, $jend ) = @{$item};
- my $nlines = $jend - $jbeg + 1;
- next unless ( $nlines > 2 );
-
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
- my $line = $rnew_lines->[$jj];
- $rtokens = $line->get_rtokens();
- $rfield_lengths = $line->get_rfield_lengths();
- $imax = @{$rtokens} - 2;
-
- # start a new match group
- if ( $jj == $jbeg ) {
- $start_match->($jj);
- next;
- }
-
- # see if all tokens of this line match the current group
- my $match;
- if ( $imax == $imax_match ) {
- for ( my $i = 0 ; $i <= $imax ; $i++ ) {
- my $tok = $rtokens->[$i];
- my $tok_match = $rtokens_match->[$i];
- last if ( $tok ne $tok_match );
- }
- $match = 1;
- }
-
- # yes, they all match
- if ($match) {
- $add_to_match->($jj);
- }
-
- # now, this line does not match
- else {
- $end_match->();
- $start_match->($jj);
- }
- } # End loopover lines
- $end_match->();
- } # End loop over subgroups
- return;
-} ## end sub delete_null_alignments
-
sub match_line_pairs {
my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
# 2 = no match, and lines do not match at all
my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
- my $GoToMsg = "";
- my $return_code = 1;
+ my $GoToMsg = EMPTY_STRING;
+ my $return_code = 0;
my ( $alignment_token, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
# do not align commas unless they are in named
# containers
- $GoToMsg = "do not align commas in unnamed containers";
- goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
+ if ( $tok !~ /[A-Za-z]/ ) {
+ $return_code = 1;
+ $GoToMsg = "do not align commas in unnamed containers";
+ }
+ else {
+ $return_code = 0;
+ }
}
# do not align parens unless patterns match;
# But we can allow a match if the parens don't
# require any padding.
- $GoToMsg = "do not align '(' unless patterns match or pad=0";
- if ( $pad != 0 ) { goto NO_MATCH }
+ if ( $pad != 0 ) {
+ $return_code = 1;
+ $GoToMsg = "do not align '(' unless patterns match or pad=0";
+ }
+ else {
+ $return_code = 0;
+ }
}
# Handle an '=' alignment with different patterns to
# letter of the pattern. This is crude, but works
# well enough.
if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
- $GoToMsg = "first character before equals differ";
- goto NO_MATCH;
+ $GoToMsg = "first character before equals differ";
+ $return_code = 1;
}
# The introduction of sub 'prune_alignment_tree'
# left with scalars on the left. We will also prevent
# any partial alignments.
- # set return code 2 if the = is at line level, but
- # set return code 1 if the = is below line level, i.e.
- # sub new { my ( $p, $v ) = @_; bless \$v, $p }
- # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
+ # set return code 2 if the = is at line level, but
+ # set return code 1 if the = is below line level, i.e.
+ # sub new { my ( $p, $v ) = @_; bless \$v, $p }
+ # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
elsif (
( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
{
- $GoToMsg = "mixed commas/no-commas before equals";
+ $GoToMsg = "mixed commas/no-commas before equals";
+ $return_code = 1;
if ( $lev eq $group_level ) {
$return_code = 2;
}
- goto NO_MATCH;
+ }
+ else {
+ $return_code = 0;
}
}
-
- MATCH:
- return ( 0, \$GoToMsg );
-
- NO_MATCH:
+ else {
+ $return_code = 0;
+ }
EXPLAIN_COMPARE_PATTERNS
+ && $return_code
&& print STDERR "no match because $GoToMsg\n";
return ( $return_code, \$GoToMsg );
next unless ( $nlines > 1 );
# loop over lines in a subgroup
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ foreach my $jj ( $jbeg .. $jend ) {
$line_m = $line;
$rtokens_m = $rtokens;
$ci_level_m = $ci_level;
$line = $rnew_lines->[$jj];
- $rtokens = $line->get_rtokens();
- $rpatterns = $line->get_rpatterns();
- $rfield_lengths = $line->get_rfield_lengths();
+ $rtokens = $line->{'rtokens'};
+ $rpatterns = $line->{'rpatterns'};
+ $rfield_lengths = $line->{'rfield_lengths'};
$imax = @{$rtokens} - 2;
- $list_type = $line->get_list_type();
- $ci_level = $line->get_ci_level();
+ $list_type = $line->{'list_type'};
+ $ci_level = $line->{'ci_level'};
# nothing to do for first line
next if ( $jj == $jbeg );
# find number of leading common tokens
- #################################
+ #---------------------------------
# No match to hanging side comment
- #################################
- if ( $line->get_is_hanging_side_comment() ) {
+ #---------------------------------
+ if ( $line->{'is_hanging_side_comment'} ) {
# Should not get here; HSC's have been filtered out
$imax_align = -1;
}
- ##############################
+ #-----------------------------
# Handle comma-separated lists
- ##############################
+ #-----------------------------
elsif ( $list_type && $list_type eq $list_type_m ) {
# do not align lists across a ci jump with new list method
if ($ci_jump) { $imax_min = -1 }
my $i_nomatch = $imax_min + 1;
- for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ foreach my $i ( 0 .. $imax_min ) {
my $tok = $rtokens->[$i];
my $tok_m = $rtokens_m->[$i];
if ( $tok ne $tok_m ) {
$imax_align = $i_nomatch - 1;
}
- ##################
+ #-----------------
# Handle non-lists
- ##################
+ #-----------------
else {
my $i_nomatch = $imax_min + 1;
- for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ foreach my $i ( 0 .. $imax_min ) {
my $tok = $rtokens->[$i];
my $tok_m = $rtokens_m->[$i];
if ( $tok ne $tok_m ) {
$tok, $tok_m, $pat, $pat_m, $pad
);
if ($match_code) {
- if ( $match_code eq 1 ) { $i_nomatch = $i }
- elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
+ if ( $match_code == 1 ) { $i_nomatch = $i }
+ elsif ( $match_code == 2 ) { $i_nomatch = 0 }
last;
}
}
$imax_align = $i_nomatch - 1;
}
- $line_m->set_imax_pair($imax_align);
+ $line_m->{'imax_pair'} = $imax_align;
} ## end loop over lines
# Put fence at end of subgroup
- $line->set_imax_pair(-1);
+ $line->{'imax_pair'} = -1;
} ## end loop over subgroups
if ( @{$rlines} > @{$rnew_lines} ) {
my $last_pair_info = -1;
foreach my $line ( @{$rlines} ) {
- if ( $line->get_is_hanging_side_comment() ) {
- $line->set_imax_pair($last_pair_info);
+ if ( $line->{'is_hanging_side_comment'} ) {
+ $line->{'imax_pair'} = $last_pair_info;
}
else {
- $last_pair_info = $line->get_imax_pair();
+ $last_pair_info = $line->{'imax_pair'};
}
}
}
# $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
my @all_token_info;
my $all_monotonic = 1;
- for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ foreach my $jj ( 0 .. @{$rlines} - 1 ) {
my ($line) = $rlines->[$jj];
- my $rtokens = $line->get_rtokens();
+ my $rtokens = $line->{'rtokens'};
my $last_lev;
my $is_monotonic = 1;
my $i = -1;
}
my $rline_values = [];
- for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ foreach my $jj ( 0 .. @{$rlines} - 1 ) {
my ($line) = $rlines->[$jj];
- my $rtokens = $line->get_rtokens();
+ my $rtokens = $line->{'rtokens'};
my $i = -1;
my ( $lev_min, $lev_max );
- my $token_pattern_max = "";
+ my $token_pattern_max = EMPTY_STRING;
my %saw_level;
- my @token_info;
my $is_monotonic = 1;
# find the index of the last token before the side comment
my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
if ( $all_monotonic && $tok_end =~ /^,/ ) {
- my $i = $imax - 1;
- while ( $i >= 0
- && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
+ my $ii = $imax - 1;
+ while ( $ii >= 0
+ && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
{
- $imax = $i;
- $i--;
+ $imax = $ii;
+ $ii--;
}
}
$lev_min = -1;
$lev_max = -1;
$levs[0] = -1;
- $rtoken_patterns->{$lev_min} = "";
+ $rtoken_patterns->{$lev_min} = EMPTY_STRING;
$rtoken_indexes->{$lev_min} = [];
}
# debug
0 && do {
- local $" = ')(';
+ local $LIST_SEPARATOR = ')(';
print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
foreach my $key ( sort keys %{$rtoken_patterns} ) {
print "$key => $rtoken_patterns->{$key}\n";
# );
# In the above example, all lines have three commas at the lowest depth
- # (zero), so if there were no other alignements, these lines would all
+ # (zero), so if there were no other alignments, these lines would all
# align considering only the zero depth alignment token. But some lines
# have additional comma alignments at the next depth, so we need to decide
# if we should drop those to keep the top level alignments, or keep those
use constant EXPLAIN_PRUNE => 0;
- ####################################################################
+ #-------------------------------------------------------------------
# Prune Tree Step 1. Start by scanning the lines and collecting info
- ####################################################################
+ #-------------------------------------------------------------------
# Note that the caller had this info but we have to redo this now because
# alignment tokens may have been deleted.
# the patterns and levels of the next line being tested at each depth
my ( @token_patterns_next, @levels_next, @token_indexes_next );
- #########################################################
+ #-----------------------------------------------------------
# define a recursive worker subroutine for tree construction
- #########################################################
+ #-----------------------------------------------------------
# This is a recursive routine which is called if a match condition changes
# at any depth when a new line is encountered. It ends the match node
return;
}; ## end sub end_node
- ######################################################
+ #-----------------------------------------------------
# Prune Tree Step 2. Loop to form the tree of matches.
- ######################################################
- for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
+ #-----------------------------------------------------
+ foreach my $jp ( 0 .. $jmax ) {
# working with two adjacent line indexes, 'm'=minus, 'p'=plus
my $jm = $jp - 1;
$levels_next[$MAX_DEPTH] = $rlevs->[-1];
}
my $depth = 0;
- foreach (@levels_next) {
+ foreach my $item (@levels_next) {
$token_patterns_next[$depth] =
- defined($_) ? $rtoken_patterns->{$_} : undef;
+ defined($item) ? $rtoken_patterns->{$item} : undef;
$token_indexes_next[$depth] =
- defined($_) ? $rtoken_indexes->{$_} : undef;
+ defined($item) ? $rtoken_indexes->{$item} : undef;
$depth++;
}
}
# End groups if a hard flag has been set
- elsif ( $rlines->[$jm]->get_end_group() ) {
+ elsif ( $rlines->[$jm]->{'end_group'} ) {
my $n_parent;
$end_node->( 0, $jm, $n_parent );
}
# Continue at hanging side comment
- elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
+ elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
next;
}
}
} ## end loop to form tree of matches
- ##########################################################
+ #---------------------------------------------------------
# Prune Tree Step 3. Make links from parent to child nodes
- ##########################################################
+ #---------------------------------------------------------
# It seemed cleaner to do this as a separate step rather than during tree
# construction. The children nodes have links up to the parent node which
# created them. Now make links in the opposite direction, so the parents
# can find the children. We store the range of children nodes ($nc_beg,
- # $nc_end) of each parent with two additional indexes in the orignal array.
+ # $nc_end) of each parent with two additional indexes in the original array.
# These will be undef if no children.
- for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
+ foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
next unless defined( $match_tree[$depth] );
my $nc_max = @{ $match_tree[$depth] } - 1;
my $np_now;
}
};
- #######################################################
+ #------------------------------------------------------
# Prune Tree Step 4. Make a list of nodes to be deleted
- #######################################################
+ #------------------------------------------------------
# list of lines with tokens to be deleted:
# [$jbeg, $jend, $level_keep]
# $level_keep is the minimum level to keep
my @delete_list;
+ # Not currently used:
# Groups with ending comma lists and their range of sizes:
# $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
- my %ragged_comma_group;
+ ## my %ragged_comma_group;
# Define a threshold line count for forcing a break
my $nlines_break = 3;
@todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
}
- for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
+ foreach my $depth ( 0 .. $MAX_DEPTH ) {
last unless (@todo_list);
my @todo_next;
foreach my $np (@todo_list) {
@todo_list = @todo_next;
} ## end loop to mark nodes to delete
- #############################################################
+ #------------------------------------------------------------
# Prune Tree Step 5. Loop to delete selected alignment tokens
- #############################################################
+ #------------------------------------------------------------
foreach my $item (@delete_list) {
my ( $jbeg, $jend, $level_keep ) = @{$item};
foreach my $jj ( $jbeg .. $jend ) {
my $line = $rlines->[$jj];
my @idel;
- my $rtokens = $line->get_rtokens();
+ my $rtokens = $line->{'rtokens'};
my $imax = @{$rtokens} - 2;
- for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ foreach my $i ( 0 .. $imax ) {
my $tok = $rtokens->[$i];
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token($tok);
sub Dump_tree_groups {
my ( $rgroup, $msg ) = @_;
print "$msg\n";
- local $" = ')(';
+ local $LIST_SEPARATOR = ')(';
foreach my $item ( @{$rgroup} ) {
my @fix = @{$item};
- foreach (@fix) { $_ = "undef" unless defined $_; }
+ foreach my $val (@fix) { $val = "undef" unless defined $val; }
$fix[4] = "...";
print "(@fix)\n";
}
my $is_marginal = 0;
- # always keep alignments of a terminal else or ternary
- goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
+ #---------------------------------------
+ # Always align certain special cases ...
+ #---------------------------------------
+ if (
- # always align lists
- my $group_list_type = $line_0->get_list_type();
- goto RETURN if ($group_list_type);
+ # always keep alignments of a terminal else or ternary
+ defined( $line_1->{'j_terminal_match'} )
- # always align hanging side comments
- my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
- goto RETURN if ($is_hanging_side_comment);
+ # always align lists
+ || $line_0->{'list_type'}
- my $jmax_0 = $line_0->get_jmax();
- my $jmax_1 = $line_1->get_jmax();
- my $rtokens_1 = $line_1->get_rtokens();
- my $rtokens_0 = $line_0->get_rtokens();
- my $rfield_lengths_0 = $line_0->get_rfield_lengths();
- my $rfield_lengths_1 = $line_1->get_rfield_lengths();
- my $rpatterns_0 = $line_0->get_rpatterns();
- my $rpatterns_1 = $line_1->get_rpatterns();
- my $imax_next = $line_1->get_imax_pair();
+ # always align hanging side comments
+ || $line_1->{'is_hanging_side_comment'}
+
+ )
+ {
+ return ( $is_marginal, $imax_align );
+ }
+
+ my $jmax_0 = $line_0->{'jmax'};
+ my $jmax_1 = $line_1->{'jmax'};
+ my $rtokens_1 = $line_1->{'rtokens'};
+ my $rtokens_0 = $line_0->{'rtokens'};
+ my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
+ my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
+ my $rpatterns_0 = $line_0->{'rpatterns'};
+ my $rpatterns_1 = $line_1->{'rpatterns'};
+ my $imax_next = $line_1->{'imax_pair'};
# We will scan the alignment tokens and set a flag '$is_marginal' if
# it seems that the an alignment would look bad.
my $max_pad = 0;
my $saw_good_alignment = 0;
- my $saw_if_or; # if we saw an 'if' or 'or' at group level
- my $raw_tokb = ""; # first token seen at group level
+ my $saw_if_or; # if we saw an 'if' or 'or' at group level
+ my $raw_tokb = EMPTY_STRING; # first token seen at group level
my $jfirst_bad;
my $line_ending_fat_comma; # is last token just a '=>' ?
my $j0_eq_pad;
my $j0_max_pad = 0;
- for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
+ foreach my $j ( 0 .. $jmax_1 - 2 ) {
my ( $raw_tok, $lev, $tag, $tok_count ) =
decode_alignment_token( $rtokens_1->[$j] );
if ( $raw_tok && $lev == $group_level ) {
my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
if ( $j == 0 ) {
- $pad += $line_1->get_leading_space_count() -
- $line_0->get_leading_space_count();
+ $pad += $line_1->{'leading_space_count'} -
+ $line_0->{'leading_space_count'};
# Remember the pad at a leading equals
if ( $raw_tok eq '=' && $lev == $group_level ) {
&& $jmax_1 == 2
&& $sc_term0 ne $sc_term1;
- ########################################
- # return unless this is a marginal match
- ########################################
- goto RETURN if ( !$is_marginal );
+ #---------------------------------------
+ # return if this is not a marginal match
+ #---------------------------------------
+ if ( !$is_marginal ) {
+ return ( $is_marginal, $imax_align );
+ }
# Undo the marginal match flag in certain cases,
my $pat0 = $rpatterns_0->[0];
my $pat1 = $rpatterns_1->[0];
- ##########################################################
+ #---------------------------------------------------------
# Turn off the marginal flag for some types of assignments
- ##########################################################
+ #---------------------------------------------------------
if ( $is_assignment{$raw_tokb} ) {
# undo marginal flag if first line is semicolon terminated
}
}
- ######################################################
+ #-----------------------------------------------------
# Turn off the marginal flag if we saw an 'if' or 'or'
- ######################################################
+ #-----------------------------------------------------
# A trailing 'if' and 'or' often gives a good alignment
# For example, we can align these:
$imax_align = $jfirst_bad - 1;
}
- ###########################################################
+ #----------------------------------------------------------
# Allow sweep to match lines with leading '=' in some cases
- ###########################################################
+ #----------------------------------------------------------
if ( $imax_align < 0 && defined($j0_eq_pad) ) {
if (
}
}
- RETURN:
return ( $is_marginal, $imax_align );
}
-}
+} ## end closure for sub is_marginal_match
sub get_extra_leading_spaces {
return 0 unless ( @{$rlines} && @{$rgroups} );
- my $object = $rlines->[0]->get_indentation();
+ my $object = $rlines->[0]->{'indentation'};
return 0 unless ( ref($object) );
my $extra_leading_spaces = 0;
my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
next if ( $j == 0 );
# all indentation objects must be the same
- if ( $object != $rlines->[$j]->get_indentation() ) {
+ if ( $object != $rlines->[$j]->{'indentation'} ) {
return 0;
}
}
? $extra_indentation_spaces_wanted
: $avail;
- #########################################################
+ #--------------------------------------------------------
# Note: min spaces can be negative; for example with -gnu
# f(
# do { 1; !!(my $x = bless []); }
# );
- #########################################################
+ #--------------------------------------------------------
# The following rule is needed to match older formatting:
# For multiple groups, we will keep spaces non-negative.
# For a single group, we will allow a negative space.
# a previous side comment should be forgotten. This involves
# checking several rules.
- # Return true to keep old comment location
- # Return false to forget old comment location
+ # Return true to KEEP old comment location
+ # Return false to FORGET old comment location
+ my $KEEP = 1;
+ my $FORGET = 0;
- my $rfields = $line->get_rfields();
- my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
+ my $rfields = $line->{'rfields'};
+ my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
# RULE1: Never forget comment before a hanging side comment
- goto KEEP if ($is_hanging_side_comment);
+ return $KEEP if ($is_hanging_side_comment);
# RULE2: Forget a side comment after a short line difference,
# where 'short line difference' is computed from a formula.
my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
- goto FORGET
+ return $FORGET
if ( $line_diff > $short_diff
|| !$self->[_rOpts_valign_side_comments_] );
# RULE3: Forget a side comment if this line is at lower level and
# ends a block
my $last_sc_level = $self->[_last_side_comment_level_];
- goto FORGET
+ return $FORGET
if ( $level < $last_sc_level
&& $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
# [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
# [0, 4, 8], [2, 4, 6]
# ) # diagonals
- goto FORGET
+ return $FORGET
if ( $cached_line_type == 2 || $cached_line_type == 4 );
}
# Otherwise, keep it alive
- goto KEEP;
-
- FORGET:
- return 0;
-
- KEEP:
- return 1;
+ return $KEEP;
}
sub align_side_comments {
# and fake side comments. This has the consequence that the lengths of
# long lines without real side comments can cause 'push' all side comments
# to the right. This seems unusual, but testing with and without this
- # feature shows that it is usually better this way. Othewise, side
+ # feature shows that it is usually better this way. Otherwise, side
# comments can be hidden between long lines without side comments and
# thus be harder to read.
my ( $jbeg, $jend ) = @{$item};
foreach my $j ( $jbeg .. $jend ) {
my $line = $rlines->[$j];
- my $jmax = $line->get_jmax();
- if ( $line->get_rfield_lengths()->[$jmax] ) {
+ my $jmax = $line->{'jmax'};
+ if ( $line->{'rfield_lengths'}->[$jmax] ) {
# this group has a line with a side comment
push @todo, $ng;
# Count $num5 = number of comments in the 5 lines after the first comment
# This is an important factor in a decision formula
my $num5 = 1;
- for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
+ foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
my $ldiff = $jj - $j_sc_beg;
last if ( $ldiff > 5 );
my $line = $rlines->[$jj];
- my $jmax = $line->get_jmax();
- my $sc_len = $line->get_rfield_lengths()->[$jmax];
+ my $jmax = $line->{'jmax'};
+ my $sc_len = $line->{'rfield_lengths'}->[$jmax];
next unless ($sc_len);
$num5++;
}
# Forget the old side comment location if necessary
- my $line = $rlines->[$j_sc_beg];
+ my $line_0 = $rlines->[$j_sc_beg];
my $lnum =
$j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
my $keep_it =
- $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
+ $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
my $last_side_comment_column =
$keep_it ? $self->[_last_side_comment_column_] : 0;
# Loop over passes
my $max_comment_column = $last_side_comment_column;
- for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
+ foreach my $PASS ( 1 .. $MAX_PASS ) {
# If there are two passes, then on the last pass make the old column
# equal to the largest of the group. This will result in the comments
# Note that since all lines in a group have common alignments, we
# just have to work on one of the lines (the first line).
my $line = $rlines->[$jbeg];
- my $jmax = $line->get_jmax();
- my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
+ my $jmax = $line->{'jmax'};
+ my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
last
if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
my $j_sc_last;
my $ng_last = $todo[-1];
my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
- for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
+ foreach my $jj ( reverse( $jbeg .. $jend ) ) {
my $line = $rlines->[$jj];
- my $jmax = $line->get_jmax();
- if ( $line->get_rfield_lengths()->[$jmax] ) {
+ my $jmax = $line->{'jmax'};
+ if ( $line->{'rfield_lengths'}->[$jmax] ) {
$j_sc_last = $jj;
last;
}
sub valign_output_step_A {
- ###############################################################
+ #------------------------------------------------------------
# This is Step A in writing vertically aligned lines.
# The line is prepared according to the alignments which have
# been found. Then it is shipped to the next step.
- ###############################################################
+ #------------------------------------------------------------
my ( $self, $rinput_hash ) = @_;
my $level = $rinput_hash->{level};
my $maximum_line_length = $rinput_hash->{maximum_line_length};
- my $rfields = $line->get_rfields();
- my $rfield_lengths = $line->get_rfield_lengths();
- my $leading_space_count = $line->get_leading_space_count();
- my $outdent_long_lines = $line->get_outdent_long_lines();
- my $maximum_field_index = $line->get_jmax();
- my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
- my $Kend = $line->get_Kend();
- my $level_end = $line->get_level_end();
+ my $rfields = $line->{'rfields'};
+ my $rfield_lengths = $line->{'rfield_lengths'};
+ my $leading_space_count = $line->{'leading_space_count'};
+ my $outdent_long_lines = $line->{'outdent_long_lines'};
+ my $maximum_field_index = $line->{'jmax'};
+ my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
+ my $Kend = $line->{'Kend'};
+ my $level_end = $line->{'level_end'};
+
+ # Check for valid hash keys at end of lifetime of $line during development
+ DEVEL_MODE
+ && check_keys( $line, \%valid_LINE_keys,
+ "Checking line keys at valign_output_step_A", 1 );
# add any extra spaces
if ( $leading_space_count > $group_leader_length ) {
my $str = $rfields->[0];
my $str_len = $rfield_lengths->[0];
+ my @alignments = @{ $line->{'ralignments'} };
+ if ( @alignments != $maximum_field_index + 1 ) {
+
+ # Shouldn't happen: sub install_new_alignments makes jmax alignments
+ my $jmax_alignments = @alignments - 1;
+ if (DEVEL_MODE) {
+ Fault(
+"alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
+ );
+ }
+ $do_not_align = 1;
+ }
+
# loop to concatenate all fields of this line and needed padding
my $total_pad_count = 0;
for my $j ( 1 .. $maximum_field_index ) {
);
# compute spaces of padding before this field
- my $col = $line->get_column( $j - 1 );
+ my $col = $alignments[ $j - 1 ]->{'column'};
my $pad = $col - ( $str_len + $leading_space_count );
if ($do_not_align) {
# only add padding when we have a finite field;
# this avoids extra terminal spaces if we have empty fields
if ( $rfield_lengths->[$j] > 0 ) {
- $str .= ' ' x $total_pad_count;
+ $str .= SPACE x $total_pad_count;
$str_len += $total_pad_count;
$total_pad_count = 0;
$str .= $rfields->[$j];
if ( !defined($imax_align) ) { $imax_align = -1 }
# First delete the unwanted tokens
- my $jmax_old = $line_0->get_jmax();
- my @old_alignments = $line_0->get_alignments();
- my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
-
+ my $jmax_old = $line_0->{'jmax'};
+ my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
return unless (@idel);
+ # Get old alignments before any changes are made
+ my @old_alignments = @{ $line_0->{'ralignments'} };
+
foreach my $line ( $line_0, $line_1 ) {
delete_selected_tokens( $line, \@idel );
}
@old_alignments[ 0 .. $imax_align ];
}
- my $jmax_new = $line_0->get_jmax();
+ my $jmax_new = $line_0->{'jmax'};
$new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
- $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
- $line_0->set_alignments(@new_alignments);
- $line_1->set_alignments(@new_alignments);
+ $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
+ $line_0->{'ralignments'} = \@new_alignments;
+ $line_1->{'ralignments'} = \@new_alignments;
return;
}
my $cached_seqno_string;
my $cached_line_Kend;
my $cached_line_maximum_length;
+
+ # These are passed to step_C:
my $seqno_string;
my $last_nonblank_seqno_string;
- sub get_seqno_string {
- return $seqno_string;
- }
-
- sub get_last_nonblank_seqno_string {
- return $last_nonblank_seqno_string;
- }
-
sub set_last_nonblank_seqno_string {
my ($val) = @_;
$last_nonblank_seqno_string = $val;
sub initialize_step_B_cache {
# valign_output_step_B cache:
- $cached_line_text = "";
+ $cached_line_text = EMPTY_STRING;
$cached_line_text_length = 0;
$cached_line_type = 0;
$cached_line_opening_flag = 0;
$cached_seqno = 0;
$cached_line_valid = 0;
$cached_line_leading_space_count = 0;
- $cached_seqno_string = "";
+ $cached_seqno_string = EMPTY_STRING;
$cached_line_Kend = undef;
$cached_line_maximum_length = undef;
# These vars hold a string of sequence numbers joined together used by
# the cache
- $seqno_string = "";
- $last_nonblank_seqno_string = "";
+ $seqno_string = EMPTY_STRING;
+ $last_nonblank_seqno_string = EMPTY_STRING;
return;
}
if ($cached_line_type) {
$seqno_string = $cached_seqno_string;
$self->valign_output_step_C(
+ $seqno_string,
+ $last_nonblank_seqno_string,
+
$cached_line_text,
$cached_line_leading_space_count,
$self->[_last_level_written_],
$cached_line_Kend,
);
$cached_line_type = 0;
- $cached_line_text = "";
+ $cached_line_text = EMPTY_STRING;
$cached_line_text_length = 0;
- $cached_seqno_string = "";
+ $cached_seqno_string = EMPTY_STRING;
$cached_line_Kend = undef;
$cached_line_maximum_length = undef;
}
return;
}
+ sub handle_cached_line {
+
+ my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
+
+ # The cached line will either be:
+ # - written out, or
+ # - or combined with the current line
+
+ my $last_level_written = $self->[_last_level_written_];
+
+ my $leading_space_count = $rinput->{leading_space_count};
+ my $str = $rinput->{line};
+ my $str_length = $rinput->{line_length};
+ my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
+ my $level = $rinput->{level};
+ my $level_end = $rinput->{level_end};
+ my $maximum_line_length = $rinput->{maximum_line_length};
+
+ my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
+ $seqno_beg, $seqno_end );
+ if ($rvertical_tightness_flags) {
+
+ $open_or_close = $rvertical_tightness_flags->{_vt_type};
+ $seqno_beg = $rvertical_tightness_flags->{_vt_seqno_beg};
+ }
+
+ # Dump an invalid cached line
+ if ( !$cached_line_valid ) {
+ $self->valign_output_step_C(
+ $seqno_string,
+ $last_nonblank_seqno_string,
+
+ $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written,
+ $cached_line_Kend,
+ );
+ }
+
+ # Handle cached line ending in OPENING tokens
+ elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
+
+ my $gap = $leading_space_count - $cached_line_text_length;
+
+ # handle option of just one tight opening per line:
+ if ( $cached_line_opening_flag == 1 ) {
+ if ( defined($open_or_close) && $open_or_close == 1 ) {
+ $gap = -1;
+ }
+ }
+
+ # Do not join the lines if this might produce a one-line
+ # container which exceeds the maximum line length. This is
+ # necessary prevent blinking, particularly with the combination
+ # -xci -pvt=2. In that case a one-line block alternately forms
+ # and breaks, causing -xci to alternately turn on and off (case
+ # b765).
+ # Patched to fix cases b656 b862 b971 b972: always do the check
+ # if the maximum line length changes (due to -vmll).
+ if (
+ $gap >= 0
+ && ( $maximum_line_length != $cached_line_maximum_length
+ || ( defined($level_end) && $level > $level_end ) )
+ )
+ {
+ my $test_line_length =
+ $cached_line_text_length + $gap + $str_length;
+
+ # Add a small tolerance in the length test (fixes case b862)
+ if ( $test_line_length > $cached_line_maximum_length - 2 ) {
+ $gap = -1;
+ }
+ }
+
+ if ( $gap >= 0 && defined($seqno_beg) ) {
+ $maximum_line_length = $cached_line_maximum_length;
+ $leading_string = $cached_line_text . SPACE x $gap;
+ $leading_string_length = $cached_line_text_length + $gap;
+ $leading_space_count = $cached_line_leading_space_count;
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+ $level = $last_level_written;
+ }
+ else {
+ $self->valign_output_step_C(
+ $seqno_string,
+ $last_nonblank_seqno_string,
+
+ $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written,
+ $cached_line_Kend,
+ );
+ }
+ }
+
+ # Handle cached line ending in CLOSING tokens
+ else {
+ my $test_line =
+ $cached_line_text . SPACE x $cached_line_closing_flag . $str;
+ my $test_line_length =
+ $cached_line_text_length +
+ $cached_line_closing_flag +
+ $str_length;
+ if (
+
+ # The new line must start with container
+ $seqno_beg
+
+ # The container combination must be okay..
+ && (
+
+ # okay to combine like types
+ ( $open_or_close == $cached_line_type )
+
+ # closing block brace may append to non-block
+ || ( $cached_line_type == 2 && $open_or_close == 4 )
+
+ # something like ');'
+ || ( !$open_or_close && $cached_line_type == 2 )
+
+ )
+
+ # The combined line must fit
+ && ( $test_line_length <= $cached_line_maximum_length )
+ )
+ {
+
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+
+ # Patch to outdent closing tokens ending # in ');' If we
+ # are joining a line like ');' to a previous stacked set of
+ # closing tokens, then decide if we may outdent the
+ # combined stack to the indentation of the ');'. Since we
+ # should not normally outdent any of the other tokens more
+ # than the indentation of the lines that contained them, we
+ # will only do this if all of the corresponding opening
+ # tokens were on the same line. This can happen with -sot
+ # and -sct.
+
+ # For example, it is ok here:
+ # __PACKAGE__->load_components( qw(
+ # PK::Auto
+ # Core
+ # ));
+ #
+ # But, for example, we do not outdent in this example
+ # because that would put the closing sub brace out farther
+ # than the opening sub brace:
+ #
+ # perltidy -sot -sct
+ # $c->Tk::bind(
+ # '<Control-f>' => sub {
+ # my ($c) = @_;
+ # my $e = $c->XEvent;
+ # itemsUnderArea $c;
+ # } );
+ #
+ if ( $str =~ /^\);/
+ && $cached_line_text =~ /^[\)\}\]\s]*$/ )
+ {
+
+ # The way to tell this is if the stacked sequence
+ # numbers of this output line are the reverse of the
+ # stacked sequence numbers of the previous non-blank
+ # line of sequence numbers. So we can join if the
+ # previous nonblank string of tokens is the mirror
+ # image. For example if stack )}] is 13:8:6 then we
+ # are looking for a leading stack like [{( which
+ # is 6:8:13. We only need to check the two ends,
+ # because the intermediate tokens must fall in order.
+ # Note on speed: having to split on colons and
+ # eliminate multiple colons might appear to be slow,
+ # but it's not an issue because we almost never come
+ # through here. In a typical file we don't.
+
+ $seqno_string =~ s/^:+//;
+ $last_nonblank_seqno_string =~ s/^:+//;
+ $seqno_string =~ s/:+/:/g;
+ $last_nonblank_seqno_string =~ s/:+/:/g;
+
+ # how many spaces can we outdent?
+ my $diff =
+ $cached_line_leading_space_count - $leading_space_count;
+ if ( $diff > 0
+ && length($seqno_string)
+ && length($last_nonblank_seqno_string) ==
+ length($seqno_string) )
+ {
+ my @seqno_last =
+ ( split /:/, $last_nonblank_seqno_string );
+ my @seqno_now = ( split /:/, $seqno_string );
+ if ( @seqno_now
+ && @seqno_last
+ && $seqno_now[-1] == $seqno_last[0]
+ && $seqno_now[0] == $seqno_last[-1] )
+ {
+
+ # OK to outdent ..
+ # for absolute safety, be sure we only remove
+ # whitespace
+ my $ws = substr( $test_line, 0, $diff );
+ if ( ( length($ws) == $diff )
+ && $ws =~ /^\s+$/ )
+ {
+
+ $test_line = substr( $test_line, $diff );
+ $cached_line_leading_space_count -= $diff;
+ $last_level_written =
+ $self->level_change(
+ $cached_line_leading_space_count,
+ $diff, $last_level_written );
+ $self->reduce_valign_buffer_indentation($diff);
+ }
+
+ # shouldn't happen, but not critical:
+ ##else {
+ ## ERROR transferring indentation here
+ ##}
+ }
+ }
+ }
+
+ # Change the args to look like we received the combined line
+ $str = $test_line;
+ $str_length = $test_line_length;
+ $leading_string = EMPTY_STRING;
+ $leading_string_length = 0;
+ $leading_space_count = $cached_line_leading_space_count;
+ $level = $last_level_written;
+ $maximum_line_length = $cached_line_maximum_length;
+ }
+ else {
+ $self->valign_output_step_C(
+ $seqno_string,
+ $last_nonblank_seqno_string,
+
+ $cached_line_text,
+ $cached_line_leading_space_count,
+ $last_level_written,
+ $cached_line_Kend,
+ );
+ }
+ }
+ return ( $str, $str_length, $leading_string, $leading_string_length,
+ $leading_space_count, $level, $maximum_line_length, );
+
+ } ## end sub handle_cached_line
+
sub valign_output_step_B {
- ###############################################################
+ #---------------------------------------------------------
# This is Step B in writing vertically aligned lines.
# Vertical tightness is applied according to preset flags.
# In particular this routine handles stacking of opening
# and closing tokens.
- ###############################################################
+ #---------------------------------------------------------
my ( $self, $rinput ) = @_;
my $Kend = $rinput->{Kend};
my $maximum_line_length = $rinput->{maximum_line_length};
- my $last_level_written = $self->[_last_level_written_];
-
# Useful -gcs test cases for wide characters are
# perl527/(method.t.2, reg_mesg.t, mime-header.t)
# later by entabbing, so we have to keep track of any changes
# to the leading_space_count from here on.
my $leading_string =
- $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
+ $leading_space_count > 0
+ ? ( SPACE x $leading_space_count )
+ : EMPTY_STRING;
my $leading_string_length = length($leading_string);
# Unpack any recombination data; it was packed by
# would be a disaster.
if ( length($cached_line_text) ) {
- # Dump an invalid cached line
- if ( !$cached_line_valid ) {
- $self->valign_output_step_C(
- $cached_line_text, $cached_line_leading_space_count,
- $last_level_written, $cached_line_Kend
- );
- }
-
- # Handle cached line ending in OPENING tokens
- elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
-
- my $gap = $leading_space_count - $cached_line_text_length;
-
- # handle option of just one tight opening per line:
- if ( $cached_line_opening_flag == 1 ) {
- if ( defined($open_or_close) && $open_or_close == 1 ) {
- $gap = -1;
- }
- }
-
- # Do not join the lines if this might produce a one-line
- # container which exceeds the maximum line length. This is
- # necessary prevent blinking, particularly with the combination
- # -xci -pvt=2. In that case a one-line block alternately forms
- # and breaks, causing -xci to alternately turn on and off (case
- # b765).
- # Patched to fix cases b656 b862 b971 b972: always do the check
- # if the maximum line length changes (due to -vmll).
- if (
- $gap >= 0
- && ( $maximum_line_length != $cached_line_maximum_length
- || ( defined($level_end) && $level > $level_end ) )
- )
- {
- my $test_line_length =
- $cached_line_text_length + $gap + $str_length;
-
- # Add a small tolerance in the length test (fixes case b862)
- if ( $test_line_length > $cached_line_maximum_length - 2 ) {
- $gap = -1;
- }
- }
-
- if ( $gap >= 0 && defined($seqno_beg) ) {
- $maximum_line_length = $cached_line_maximum_length;
- $leading_string = $cached_line_text . ' ' x $gap;
- $leading_string_length = $cached_line_text_length + $gap;
- $leading_space_count = $cached_line_leading_space_count;
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
- $level = $last_level_written;
- }
- else {
- $self->valign_output_step_C(
- $cached_line_text, $cached_line_leading_space_count,
- $last_level_written, $cached_line_Kend
- );
- }
- }
-
- # Handle cached line ending in CLOSING tokens
- else {
- my $test_line =
- $cached_line_text . ' ' x $cached_line_closing_flag . $str;
- my $test_line_length =
- $cached_line_text_length +
- $cached_line_closing_flag +
- $str_length;
- if (
-
- # The new line must start with container
- $seqno_beg
-
- # The container combination must be okay..
- && (
-
- # okay to combine like types
- ( $open_or_close == $cached_line_type )
-
- # closing block brace may append to non-block
- || ( $cached_line_type == 2 && $open_or_close == 4 )
-
- # something like ');'
- || ( !$open_or_close && $cached_line_type == 2 )
-
- )
-
- # The combined line must fit
- && ( $test_line_length <= $cached_line_maximum_length )
- )
- {
-
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
-
- # Patch to outdent closing tokens ending # in ');' If we
- # are joining a line like ');' to a previous stacked set of
- # closing tokens, then decide if we may outdent the
- # combined stack to the indentation of the ');'. Since we
- # should not normally outdent any of the other tokens more
- # than the indentation of the lines that contained them, we
- # will only do this if all of the corresponding opening
- # tokens were on the same line. This can happen with -sot
- # and -sct.
-
- # For example, it is ok here:
- # __PACKAGE__->load_components( qw(
- # PK::Auto
- # Core
- # ));
- #
- # But, for example, we do not outdent in this example
- # because that would put the closing sub brace out farther
- # than the opening sub brace:
- #
- # perltidy -sot -sct
- # $c->Tk::bind(
- # '<Control-f>' => sub {
- # my ($c) = @_;
- # my $e = $c->XEvent;
- # itemsUnderArea $c;
- # } );
- #
- if ( $str =~ /^\);/
- && $cached_line_text =~ /^[\)\}\]\s]*$/ )
- {
+ (
+ $str,
+ $str_length,
+ $leading_string,
+ $leading_string_length,
+ $leading_space_count,
+ $level,
+ $maximum_line_length
- # The way to tell this is if the stacked sequence
- # numbers of this output line are the reverse of the
- # stacked sequence numbers of the previous non-blank
- # line of sequence numbers. So we can join if the
- # previous nonblank string of tokens is the mirror
- # image. For example if stack )}] is 13:8:6 then we
- # are looking for a leading stack like [{( which
- # is 6:8:13. We only need to check the two ends,
- # because the intermediate tokens must fall in order.
- # Note on speed: having to split on colons and
- # eliminate multiple colons might appear to be slow,
- # but it's not an issue because we almost never come
- # through here. In a typical file we don't.
-
- $seqno_string =~ s/^:+//;
- $last_nonblank_seqno_string =~ s/^:+//;
- $seqno_string =~ s/:+/:/g;
- $last_nonblank_seqno_string =~ s/:+/:/g;
-
- # how many spaces can we outdent?
- my $diff =
- $cached_line_leading_space_count -
- $leading_space_count;
- if ( $diff > 0
- && length($seqno_string)
- && length($last_nonblank_seqno_string) ==
- length($seqno_string) )
- {
- my @seqno_last =
- ( split /:/, $last_nonblank_seqno_string );
- my @seqno_now = ( split /:/, $seqno_string );
- if ( @seqno_now
- && @seqno_last
- && $seqno_now[-1] == $seqno_last[0]
- && $seqno_now[0] == $seqno_last[-1] )
- {
+ ) = $self->handle_cached_line( $rinput, $leading_string,
+ $leading_string_length );
- # OK to outdent ..
- # for absolute safety, be sure we only remove
- # whitespace
- my $ws = substr( $test_line, 0, $diff );
- if ( ( length($ws) == $diff )
- && $ws =~ /^\s+$/ )
- {
-
- $test_line = substr( $test_line, $diff );
- $cached_line_leading_space_count -= $diff;
- $last_level_written =
- $self->level_change(
- $cached_line_leading_space_count,
- $diff, $last_level_written );
- $self->reduce_valign_buffer_indentation(
- $diff);
- }
-
- # shouldn't happen, but not critical:
- ##else {
- ## ERROR transferring indentation here
- ##}
- }
- }
- }
+ $cached_line_type = 0;
+ $cached_line_text = EMPTY_STRING;
+ $cached_line_text_length = 0;
+ $cached_line_Kend = undef;
+ $cached_line_maximum_length = undef;
- # Change the args to look like we received the combined line
- $str = $test_line;
- $str_length = $test_line_length;
- $leading_string = "";
- $leading_string_length = 0;
- $leading_space_count = $cached_line_leading_space_count;
- $level = $last_level_written;
- $maximum_line_length = $cached_line_maximum_length;
- }
- else {
- $self->valign_output_step_C(
- $cached_line_text, $cached_line_leading_space_count,
- $last_level_written, $cached_line_Kend
- );
- }
- }
}
- $cached_line_type = 0;
- $cached_line_text = "";
- $cached_line_text_length = 0;
- $cached_line_Kend = undef;
- $cached_line_maximum_length = undef;
# make the line to be written
my $line = $leading_string . $str;
# write or cache this line ...
# fix for case b999: do not cache an outdented line
- if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
+ # fix for b1378: do not cache an empty line
+ if ( !$open_or_close
+ || $side_comment_length > 0
+ || $is_outdented_line
+ || !$line_length )
{
- $self->valign_output_step_C( $line, $leading_space_count, $level,
- $Kend );
+ $self->valign_output_step_C(
+ $seqno_string,
+ $last_nonblank_seqno_string,
+
+ $line,
+ $leading_space_count,
+ $level,
+ $Kend,
+ );
}
else {
$cached_line_text = $line;
sub initialize_valign_buffer {
@valign_buffer = ();
- $valign_buffer_filling = "";
+ $valign_buffer_filling = EMPTY_STRING;
return;
}
}
@valign_buffer = ();
}
- $valign_buffer_filling = "";
+ $valign_buffer_filling = EMPTY_STRING;
return;
}
sub valign_output_step_C {
- ###############################################################
+ #-----------------------------------------------------------------------
# This is Step C in writing vertically aligned lines.
# Lines are either stored in a buffer or passed along to the next step.
# The reason for storing lines is that we may later want to reduce their
# indentation when -sot and -sct are both used.
- ###############################################################
- my ( $self, @args ) = @_;
+ #-----------------------------------------------------------------------
+ my (
+ $self,
+ $seqno_string,
+ $last_nonblank_seqno_string,
- my $seqno_string = get_seqno_string();
- my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
+ @args_to_D,
+ ) = @_;
# Dump any saved lines if we see a line with an unbalanced opening or
# closing token.
# Either store or write this line
if ($valign_buffer_filling) {
- push @valign_buffer, [@args];
+ push @valign_buffer, [@args_to_D];
}
else {
- $self->valign_output_step_D(@args);
+ $self->valign_output_step_D(@args_to_D);
}
# For lines starting or ending with opening or closing tokens..
# Start storing lines when we see a line with multiple stacked
# opening tokens.
# patch for RT #94354, requested by Colin Williams
- if ( $seqno_string =~ /^\d+(\:+\d+)+$/
- && $args[0] !~ /^[\}\)\]\:\?]/ )
+ if ( index( $seqno_string, ':' ) >= 0
+ && $seqno_string =~ /^\d+(\:+\d+)+$/
+ && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
{
# This test is efficient but a little subtle: The first test
sub valign_output_step_D {
- ###############################################################
+ #----------------------------------------------------------------
# This is Step D in writing vertically aligned lines.
# It is the end of the vertical alignment pipeline.
# Write one vertically aligned line of code to the output object.
- ###############################################################
+ #----------------------------------------------------------------
my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
$leading_space_count % $rOpts_entab_leading_whitespace;
my $tab_count =
int( $leading_space_count / $rOpts_entab_leading_whitespace );
- my $leading_string = "\t" x $tab_count . ' ' x $space_count;
+ my $leading_string = "\t" x $tab_count . SPACE x $space_count;
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
substr( $line, 0, $leading_space_count ) = $leading_string;
}
"Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
);
}
- $leading_string = ( ' ' x $leading_space_count );
+ $leading_string = ( SPACE x $leading_space_count );
}
else {
- $leading_string .= ( ' ' x $space_count );
+ $leading_string .= ( SPACE x $space_count );
}
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
substr( $line, 0, $leading_space_count ) = $leading_string;
# Handle case of zero whitespace, which includes multi-line quotes
# (which may have a finite level; this prevents tab problems)
if ( $leading_whitespace_count <= 0 ) {
- return "";
+ return EMPTY_STRING;
}
# look for previous result
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
- $leading_string = ( ' ' x $leading_whitespace_count );
+ $leading_string = ( SPACE x $leading_whitespace_count );
}
# Handle entab option
$leading_whitespace_count % $rOpts_entab_leading_whitespace;
my $tab_count = int(
$leading_whitespace_count / $rOpts_entab_leading_whitespace );
- $leading_string = "\t" x $tab_count . ' ' x $space_count;
+ $leading_string = "\t" x $tab_count . SPACE x $space_count;
}
# Handle option of one tab per level
);
# -- skip entabbing
- $leading_string = ( ' ' x $leading_whitespace_count );
+ $leading_string = ( SPACE x $leading_whitespace_count );
}
else {
- $leading_string .= ( ' ' x $space_count );
+ $leading_string .= ( SPACE x $space_count );
}
}
$leading_string_cache[$leading_whitespace_count] = $leading_string;