X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lib%2FPerl%2FTidy%2FVerticalAligner.pm;h=7613ee29dfb55c04b94ff20b4ebb990a82093336;hb=57d829ae0e2c75828f8ecc9c7139579350927dbc;hp=7efabb58f1b22e59e5649fcfa40a1530625e2803;hpb=7f27e55dce5925b2bbe8fcfca64f385e917a52be;p=perltidy.git diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm index 7efabb5..7613ee2 100644 --- a/lib/Perl/Tidy/VerticalAligner.pm +++ b/lib/Perl/Tidy/VerticalAligner.pm @@ -1,7 +1,7 @@ package Perl::Tidy::VerticalAligner; use strict; use warnings; -our $VERSION = '20200110'; +our $VERSION = '20210717'; use Perl::Tidy::VerticalAligner::Alignment; use Perl::Tidy::VerticalAligner::Line; @@ -10,183 +10,248 @@ use Perl::Tidy::VerticalAligner::Line; # attempts to line up certain common tokens, such as => and #, which are # identified by the calling routine. # -# There are two main routines: valign_input and flush. Append acts as a -# storage buffer, collecting lines into a group which can be vertically -# aligned. When alignment is no longer possible or desirable, it dumps -# the group to flush. +# Usage: +# - Initiate an object with a call to new(). +# - Write lines one-by-one with calls to valign_input(). +# - Make a final call to flush() to empty the pipeline. # -# valign_input -----> flush +# The sub valign_input collects lines into groups. When a group reaches +# the maximum possible size it is processed for alignment and output. +# The maximum group size is reached whenerver there is a change in indentation +# level, a blank line, a block comment, or an external flush call. The calling +# routine may also force a break in alignment at any time. # -# collects writes -# vertical one -# groups group +# If the calling routine needs to interrupt the output and send other text to +# the output, it must first call flush() to empty the output pipeline. This +# might occur for example if a block of pod text needs to be sent to the output +# between blocks of code. + +# It is essential that a final call to flush() be made. Otherwise some +# final lines of text will be lost. + +# Index... +# CODE SECTION 1: Preliminary code, global definitions and sub new +# sub new +# CODE SECTION 2: Some Basic Utilities +# CODE SECTION 3: Code to accept input and form groups +# sub valign_input +# CODE SECTION 4: Code to process comment lines +# sub _flush_comment_lines +# CODE SECTION 5: Code to process groups of code lines +# sub _flush_group_lines +# CODE SECTION 6: Output Step A +# sub valign_output_step_A +# CODE SECTION 7: Output Step B +# sub valign_output_step_B +# CODE SECTION 8: Output Step C +# sub valign_output_step_C +# CODE SECTION 9: Output Step D +# sub valign_output_step_D +# CODE SECTION 10: Summary +# sub report_anything_unusual + +################################################################## +# CODE SECTION 1: Preliminary code, global definitions and sub new +################################################################## + +sub AUTOLOAD { + + # Catch any undefined sub calls so that we are sure to get + # some diagnostic information. This sub should never be called + # except for a programming error. + our $AUTOLOAD; + return if ( $AUTOLOAD =~ /\bDESTROY$/ ); + my ( $pkg, $fname, $lno ) = caller(); + my $my_package = __PACKAGE__; + print STDERR < $i++, + _logger_object_ => $i++, + _diagnostics_object_ => $i++, + _length_function_ => $i++, + + _rOpts_ => $i++, + _rOpts_indent_columns_ => $i++, + _rOpts_tabs_ => $i++, + _rOpts_entab_leading_whitespace_ => $i++, + _rOpts_fixed_position_side_comment_ => $i++, + _rOpts_minimum_space_to_comment_ => $i++, + _rOpts_maximum_line_length_ => $i++, + _rOpts_variable_maximum_line_length_ => $i++, + _rOpts_valign_ => $i++, + + _last_level_written_ => $i++, + _last_side_comment_column_ => $i++, + _last_side_comment_line_number_ => $i++, + _last_side_comment_length_ => $i++, + _last_side_comment_level_ => $i++, + _outdented_line_count_ => $i++, + _first_outdented_line_at_ => $i++, + _last_outdented_line_at_ => $i++, + _consecutive_block_comments_ => $i++, + + _rgroup_lines_ => $i++, + _group_level_ => $i++, + _group_type_ => $i++, + _zero_count_ => $i++, + _last_leading_space_count_ => $i++, + _comment_leading_space_count_ => $i++, + }; + + # Debug flag. This is a relic from the original program development + # looking for problems with tab characters. Caution: this debug flag can + # produce a lot of output It should be 0 except when debugging small + # scripts. - use constant VALIGN_DEBUG_FLAG_APPEND => 0; - use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; - use constant VALIGN_DEBUG_FLAG_TERNARY => 0; - use constant VALIGN_DEBUG_FLAG_TABS => 0; + use constant DEBUG_TABS => 0; my $debug_warning = sub { print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; return; }; - VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); - VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); - VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); - VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); + DEBUG_TABS && $debug_warning->('TABS'); } -use vars qw( - $vertical_aligner_self - $maximum_alignment_index - $ralignment_list - $maximum_jmax_seen - $minimum_jmax_seen - $previous_minimum_jmax_seen - $previous_maximum_jmax_seen - @group_lines - $group_level - $group_type - $group_maximum_gap - $marginal_match - $last_level_written - $last_leading_space_count - $extra_indent_ok - $zero_count - $last_comment_column - $last_side_comment_line_number - $last_side_comment_length - $last_side_comment_level - $outdented_line_count - $first_outdented_line_at - $last_outdented_line_at - $diagnostics_object - $logger_object - $file_writer_object - @side_comment_history - $comment_leading_space_count - $is_matching_terminal_line - $consecutive_block_comments - - $cached_line_text - $cached_line_type - $cached_line_flag - $cached_seqno - $cached_line_valid - $cached_line_leading_space_count - $cached_seqno_string - - $valign_buffer_filling - @valign_buffer - - $seqno_string - $last_nonblank_seqno_string - - $rOpts - - $rOpts_maximum_line_length - $rOpts_variable_maximum_line_length - $rOpts_continuation_indentation - $rOpts_indent_columns - $rOpts_tabs - $rOpts_entab_leading_whitespace - $rOpts_valign - - $rOpts_fixed_position_side_comment - $rOpts_minimum_space_to_comment - -); - -sub initialize { - - ( - my $class, $rOpts, $file_writer_object, $logger_object, - $diagnostics_object - ) = @_; - - # variables describing the entire space group: - $ralignment_list = []; - $group_level = 0; - $last_level_written = -1; - $extra_indent_ok = 0; # can we move all lines to the right? - $last_side_comment_length = 0; - $maximum_jmax_seen = 0; - $minimum_jmax_seen = 0; - $previous_minimum_jmax_seen = 0; - $previous_maximum_jmax_seen = 0; - - # variables describing each line of the group - @group_lines = (); # list of all lines in group - - $outdented_line_count = 0; - $first_outdented_line_at = 0; - $last_outdented_line_at = 0; - $last_side_comment_line_number = 0; - $last_side_comment_level = -1; - $is_matching_terminal_line = 0; - - # most recent 3 side comments; [ line number, column ] - $side_comment_history[0] = [ -300, 0 ]; - $side_comment_history[1] = [ -200, 0 ]; - $side_comment_history[2] = [ -100, 0 ]; - - # valign_output_step_B cache: - $cached_line_text = ""; - $cached_line_type = 0; - $cached_line_flag = 0; - $cached_seqno = 0; - $cached_line_valid = 0; - $cached_line_leading_space_count = 0; - $cached_seqno_string = ""; - - # string of sequence numbers joined together - $seqno_string = ""; - $last_nonblank_seqno_string = ""; - - # frequently used parameters - $rOpts_indent_columns = $rOpts->{'indent-columns'}; - $rOpts_tabs = $rOpts->{'tabs'}; - $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; - $rOpts_fixed_position_side_comment = +sub new { + + my ( $class, @args ) = @_; + + my %defaults = ( + rOpts => undef, + file_writer_object => undef, + logger_object => undef, + diagnostics_object => undef, + length_function => sub { return length( $_[0] ) }, + ); + my %args = ( %defaults, @args ); + + # Initialize other caches and buffers + initialize_step_B_cache(); + initialize_valign_buffer(); + initialize_leading_string_cache(); + initialize_decode(); + + # Initialize all variables in $self. + # To add an item to $self, first define a new constant index in the BEGIN + # section. + my $self = []; + + # objects + $self->[_file_writer_object_] = $args{file_writer_object}; + $self->[_logger_object_] = $args{logger_object}; + $self->[_diagnostics_object_] = $args{diagnostics_object}; + $self->[_length_function_] = $args{length_function}; + + # shortcuts to user options + my $rOpts = $args{rOpts}; + + $self->[_rOpts_] = $rOpts; + $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'}; + $self->[_rOpts_tabs_] = $rOpts->{'tabs'}; + $self->[_rOpts_entab_leading_whitespace_] = + $rOpts->{'entab-leading-whitespace'}; + $self->[_rOpts_fixed_position_side_comment_] = $rOpts->{'fixed-position-side-comment'}; - $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; - $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_variable_maximum_line_length = + $self->[_rOpts_minimum_space_to_comment_] = + $rOpts->{'minimum-space-to-comment'}; + $self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'}; + $self->[_rOpts_variable_maximum_line_length_] = $rOpts->{'variable-maximum-line-length'}; - $rOpts_valign = $rOpts->{'valign'}; + $self->[_rOpts_valign_] = $rOpts->{'valign'}; + + # Batch of lines being collected + $self->[_rgroup_lines_] = []; + $self->[_group_level_] = 0; + $self->[_group_type_] = ""; + $self->[_zero_count_] = 0; + $self->[_comment_leading_space_count_] = 0; + $self->[_last_leading_space_count_] = 0; + + # Memory of what has been processed + $self->[_last_level_written_] = -1; + $self->[_last_side_comment_column_] = 0; + $self->[_last_side_comment_line_number_] = 0; + $self->[_last_side_comment_length_] = 0; + $self->[_last_side_comment_level_] = -1; + $self->[_outdented_line_count_] = 0; + $self->[_first_outdented_line_at_] = 0; + $self->[_last_outdented_line_at_] = 0; + $self->[_consecutive_block_comments_] = 0; + + bless $self, $class; + return $self; +} + +################################# +# CODE SECTION 2: Basic Utilities +################################# + +sub flush { + + # flush() is the external call to completely empty the pipeline. + my ($self) = @_; + + # push things out the pipline... - $consecutive_block_comments = 0; - forget_side_comment(); + # push out any current group lines + $self->_flush_group_lines(); - initialize_for_new_group(); + # then anything left in the cache of step_B + $self->_flush_cache(); - $vertical_aligner_self = {}; - bless $vertical_aligner_self, $class; - return $vertical_aligner_self; + # then anything left in the buffer of step_C + $self->dump_valign_buffer(); + + return; } sub initialize_for_new_group { - @group_lines = (); - $maximum_alignment_index = -1; # alignments in current group - $zero_count = 0; # count consecutive lines without tokens - $group_maximum_gap = 0; # largest gap introduced - $group_type = ""; - $marginal_match = 0; - $comment_leading_space_count = 0; - $last_leading_space_count = 0; + my ($self) = @_; + + $self->[_rgroup_lines_] = []; + $self->[_group_type_] = ""; + $self->[_zero_count_] = 0; + $self->[_comment_leading_space_count_] = 0; + $self->[_last_leading_space_count_] = 0; + + # Note that the value for _group_level_ is + # handled separately in sub valign_input return; } +sub group_line_count { + return +@{ $_[0]->[_rgroup_lines_] }; +} + # interface to Perl::Tidy::Diagnostics routines +# For debugging; not currently used sub write_diagnostics { - my $msg = shift; + my ( $self, $msg ) = @_; + my $diagnostics_object = $self->[_diagnostics_object_]; if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); } @@ -195,7 +260,8 @@ sub write_diagnostics { # interface to Perl::Tidy::Logger routines sub warning { - my ($msg) = @_; + my ( $self, $msg ) = @_; + my $logger_object = $self->[_logger_object_]; if ($logger_object) { $logger_object->warning($msg); } @@ -203,7 +269,8 @@ sub warning { } sub write_logfile_entry { - my ($msg) = @_; + my ( $self, $msg ) = @_; + my $logger_object = $self->[_logger_object_]; if ($logger_object) { $logger_object->write_logfile_entry($msg); } @@ -211,6 +278,8 @@ sub write_logfile_entry { } sub report_definite_bug { + my ( $self, $msg ) = @_; + my $logger_object = $self->[_logger_object_]; if ($logger_object) { $logger_object->report_definite_bug(); } @@ -219,7 +288,7 @@ sub report_definite_bug { sub get_cached_line_count { my $self = shift; - return @group_lines + ( $cached_line_type ? 1 : 0 ); + return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 ); } sub get_spaces { @@ -240,85 +309,33 @@ sub get_recoverable_spaces { return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; } -sub get_stack_depth { - - my $indentation = shift; - return ref($indentation) ? $indentation->get_stack_depth() : 0; -} - -sub make_alignment { - my ( $col, $token ) = @_; - - # make one new alignment at column $col which aligns token $token - ++$maximum_alignment_index; - - #my $alignment = new Perl::Tidy::VerticalAligner::Alignment( - my $nlines = @group_lines; - my $alignment = Perl::Tidy::VerticalAligner::Alignment->new( - column => $col, - starting_column => $col, - matching_token => $token, - starting_line => $nlines - 1, - ending_line => $nlines - 1, - serial_number => $maximum_alignment_index, - ); - $ralignment_list->[$maximum_alignment_index] = $alignment; - return $alignment; -} - -sub dump_alignments { - print STDOUT -"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; - for my $i ( 0 .. $maximum_alignment_index ) { - my $column = $ralignment_list->[$i]->get_column(); - my $starting_column = $ralignment_list->[$i]->get_starting_column(); - my $matching_token = $ralignment_list->[$i]->get_matching_token(); - my $starting_line = $ralignment_list->[$i]->get_starting_line(); - my $ending_line = $ralignment_list->[$i]->get_ending_line(); - print STDOUT -"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; - } - return; -} - -sub save_alignment_columns { - for my $i ( 0 .. $maximum_alignment_index ) { - $ralignment_list->[$i]->save_column(); - } - return; -} - -sub restore_alignment_columns { - for my $i ( 0 .. $maximum_alignment_index ) { - $ralignment_list->[$i]->restore_column(); - } - return; -} - -sub forget_side_comment { - $last_comment_column = 0; - return; -} - sub maximum_line_length_for_level { # return maximum line length for line starting with a given level - my $maximum_line_length = $rOpts_maximum_line_length; - if ($rOpts_variable_maximum_line_length) { - my $level = shift; + my ( $self, $level ) = @_; + my $maximum_line_length = $self->[_rOpts_maximum_line_length_]; + if ( $self->[_rOpts_variable_maximum_line_length_] ) { if ( $level < 0 ) { $level = 0 } - $maximum_line_length += $level * $rOpts_indent_columns; + $maximum_line_length += $level * $self->[_rOpts_indent_columns_]; } return $maximum_line_length; } +###################################################### +# CODE SECTION 3: Code to accept input and form groups +###################################################### + sub push_group_line { - my ($new_line) = @_; - push @group_lines, $new_line; + my ( $self, $new_line ) = @_; + my $rgroup_lines = $self->[_rgroup_lines_]; + push @{$rgroup_lines}, $new_line; return; } +use constant DEBUG_VALIGN => 0; +use constant SC_LONG_LINE_DIFF => 12; + sub valign_input { # Place one line in the current vertical group. @@ -359,7 +376,7 @@ sub valign_input { # the matching tokens, and the last one tracks the maximum line length. # # Each time a new line comes in, it joins the current vertical - # group if possible. Otherwise it causes the current group to be dumped + # group if possible. Otherwise it causes the current group to be flushed # and a new group is started. # # For each new group member, the column locations are increased, as @@ -372,17 +389,30 @@ sub valign_input { # side comments. Tabs in these fields can mess up the column counting. # The log file warns the user if there are any such tabs. - my ( $rline_hash, $rfields, $rtokens, $rpatterns ) = @_; + my ( $self, $rline_hash ) = @_; + my $level = $rline_hash->{level}; my $level_end = $rline_hash->{level_end}; + my $level_adj = $rline_hash->{level_adj}; my $indentation = $rline_hash->{indentation}; - my $is_forced_break = $rline_hash->{is_forced_break}; + my $list_seqno = $rline_hash->{list_seqno}; my $outdent_long_lines = $rline_hash->{outdent_long_lines}; my $is_terminal_ternary = $rline_hash->{is_terminal_ternary}; - my $is_terminal_statement = $rline_hash->{is_terminal_statement}; - my $do_not_pad = $rline_hash->{do_not_pad}; my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags}; my $level_jump = $rline_hash->{level_jump}; + my $rfields = $rline_hash->{rfields}; + my $rtokens = $rline_hash->{rtokens}; + my $rpatterns = $rline_hash->{rpatterns}; + my $rfield_lengths = $rline_hash->{rfield_lengths}; + my $terminal_block_type = $rline_hash->{terminal_block_type}; + my $batch_count = $rline_hash->{batch_count}; + my $break_alignment_before = $rline_hash->{break_alignment_before}; + my $break_alignment_after = $rline_hash->{break_alignment_after}; + my $Kend = $rline_hash->{Kend}; + my $ci_level = $rline_hash->{ci_level}; + + # The index '$Kend' is a value which passed along with the line text to sub + # 'write_code_line' for a convergence check. # number of fields is $jmax # number of tokens between fields is $jmax-1 @@ -392,80 +422,92 @@ sub valign_input { # set outdented flag to be sure we either align within statements or # across statement boundaries, but not both. - my $is_outdented = $last_leading_space_count > $leading_space_count; - $last_leading_space_count = $leading_space_count; + my $is_outdented = + $self->[_last_leading_space_count_] > $leading_space_count; + $self->[_last_leading_space_count_] = $leading_space_count; - # Patch: undo for hanging side comment + # Identify a hanging side comment. Hanging side comments have an empty + # initial field. my $is_hanging_side_comment = ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); + + # Undo outdented flag for a hanging side comment $is_outdented = 0 if $is_hanging_side_comment; - # Forget side comment alignment after seeing 2 or more block comments - my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); + # Identify a block comment. + my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#'; + + # Block comment .. update count if ($is_block_comment) { - $consecutive_block_comments++; + $self->[_consecutive_block_comments_]++; } + + # Not a block comment .. + # Forget side comment column if we saw 2 or more block comments, + # and reset the count else { - if ( $consecutive_block_comments > 1 ) { forget_side_comment() } - $consecutive_block_comments = 0; + + if ( $self->[_consecutive_block_comments_] > 1 ) { + $self->forget_side_comment(); + } + $self->[_consecutive_block_comments_] = 0; + } + + # Reset side comment location if we are entering a new block from level 0. + # This is intended to keep them from drifting too far to the right. + if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) { + $self->forget_side_comment(); } - VALIGN_DEBUG_FLAG_APPEND0 && do { - my $nlines = @group_lines; + my $group_level = $self->[_group_level_]; + + DEBUG_VALIGN && do { + my $nlines = $self->group_line_count(); print STDOUT -"APPEND0: entering lines=$nlines new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n"; +"Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n"; }; # Validate cached line if necessary: If we can produce a container # with just 2 lines total by combining an existing cached opening # token with the closing token to follow, then we will mark both # cached flags as valid. - if ($rvertical_tightness_flags) { - if ( @group_lines <= 1 - && $cached_line_type - && $cached_seqno - && $rvertical_tightness_flags->[2] - && $rvertical_tightness_flags->[2] == $cached_seqno ) - { - $rvertical_tightness_flags->[3] ||= 1; - $cached_line_valid ||= 1; + my $cached_line_type = get_cached_line_type(); + if ($cached_line_type) { + my $cached_line_flag = get_cached_line_flag(); + if ($rvertical_tightness_flags) { + my $cached_seqno = get_cached_seqno(); + if ( $cached_seqno + && $self->group_line_count() <= 1 + && $rvertical_tightness_flags->[2] + && $rvertical_tightness_flags->[2] == $cached_seqno ) + { + $rvertical_tightness_flags->[3] ||= 1; + set_cached_line_valid(1); + } } - } - # do not join an opening block brace with an unbalanced line - # unless requested with a flag value of 2 - if ( $cached_line_type == 3 - && !@group_lines - && $cached_line_flag < 2 - && $level_jump != 0 ) - { - $cached_line_valid = 0; + # do not join an opening block brace with an unbalanced line + # unless requested with a flag value of 2 + if ( $cached_line_type == 3 + && !$self->group_line_count() + && $cached_line_flag < 2 + && $level_jump != 0 ) + { + set_cached_line_valid(0); + } } - # patch until new aligner is finished - if ($do_not_pad) { my_flush() } - # shouldn't happen: if ( $level < 0 ) { $level = 0 } # do not align code across indentation level changes # or if vertical alignment is turned off for debugging - if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { - - # we are allowed to shift a group of lines to the right if its - # level is greater than the previous and next group - $extra_indent_ok = - ( $level < $group_level && $last_level_written < $group_level ); - - my_flush(); + if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) { - # If we know that this line will get flushed out by itself because - # of level changes, we can leave the extra_indent_ok flag set. - # That way, if we get an external flush call, we will still be - # able to do some -lp alignment if necessary. - $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); + $self->_flush_group_lines( $level - $group_level ); $group_level = $level; + $self->[_group_level_] = $group_level; # wait until after the above flush to get the leading space # count because it may have been changed if the -icp flag is in @@ -478,33 +520,43 @@ sub valign_input { # Collect outdentable block COMMENTS # -------------------------------------------------------------------- my $is_blank_line = ""; - if ( $group_type eq 'COMMENT' ) { + if ( $self->[_group_type_] eq 'COMMENT' ) { if ( ( $is_block_comment && $outdent_long_lines - && $leading_space_count == $comment_leading_space_count + && $leading_space_count == + $self->[_comment_leading_space_count_] ) || $is_blank_line ) { - push_group_line( $rfields->[0] ); + + # Note that for a comment group we are not storing a line + # but rather just the text and its length. + $self->push_group_line( + [ $rfields->[0], $rfield_lengths->[0], $Kend ] ); return; } else { - my_flush(); + $self->_flush_group_lines(); } } + my $rgroup_lines = $self->[_rgroup_lines_]; + if ( $break_alignment_before && @{$rgroup_lines} ) { + $rgroup_lines->[-1]->set_end_group(1); + } + # -------------------------------------------------------------------- # add dummy fields for terminal ternary # -------------------------------------------------------------------- my $j_terminal_match; - if ( $is_terminal_ternary && @group_lines ) { + if ( $is_terminal_ternary && @{$rgroup_lines} ) { $j_terminal_match = - fix_terminal_ternary( $group_lines[-1], $rfields, $rtokens, - $rpatterns ); + fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens, + $rpatterns, $rfield_lengths, $group_level, ); $jmax = @{$rfields} - 1; } @@ -512,13 +564,17 @@ sub valign_input { # add dummy fields for else statement # -------------------------------------------------------------------- - if ( $rfields->[0] =~ /^else\s*$/ - && @group_lines + # Note the trailing space after 'else' here. If there were no space between + # the else and the next '{' then we would not be able to do vertical + # alignment of the '{'. + if ( $rfields->[0] eq 'else ' + && @{$rgroup_lines} && $level_jump == 0 ) { $j_terminal_match = - fix_terminal_else( $group_lines[-1], $rfields, $rtokens, $rpatterns ); + fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens, + $rpatterns, $rfield_lengths ); $jmax = @{$rfields} - 1; } @@ -526,131 +582,132 @@ sub valign_input { # Handle simple line of code with no fields to match. # -------------------------------------------------------------------- if ( $jmax <= 0 ) { - $zero_count++; + $self->[_zero_count_]++; - if ( @group_lines - && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) ) + if ( @{$rgroup_lines} + && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() ) + ) { # flush the current group if it has some aligned columns.. - if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } - - # flush current group if we are just collecting side comments.. - elsif ( - - # ...and we haven't seen a comment lately - ( $zero_count > 3 ) - - # ..or if this new line doesn't fit to the left of the comments - || ( ( $leading_space_count + length( $rfields->[0] ) ) > - $group_lines[0]->get_column(0) ) - ) + # or we haven't seen a comment lately + if ( $rgroup_lines->[0]->get_jmax() > 1 + || $self->[_zero_count_] > 3 ) { - my_flush(); + $self->_flush_group_lines(); } } # start new COMMENT group if this comment may be outdented if ( $is_block_comment && $outdent_long_lines - && !@group_lines ) + && !$self->group_line_count() ) { - $group_type = 'COMMENT'; - $comment_leading_space_count = $leading_space_count; - push_group_line( $rfields->[0] ); + $self->[_group_type_] = 'COMMENT'; + $self->[_comment_leading_space_count_] = $leading_space_count; + $self->push_group_line( + [ $rfields->[0], $rfield_lengths->[0], $Kend ] ); return; } # just write this line directly if no current group, no side comment, # and no space recovery is needed. - if ( !@group_lines && !get_recoverable_spaces($indentation) ) { - valign_output_step_B( $leading_space_count, $rfields->[0], 0, - $outdent_long_lines, $rvertical_tightness_flags, $level ); + if ( !$self->group_line_count() + && !get_recoverable_spaces($indentation) ) + { + + $self->valign_output_step_B( + { + leading_space_count => $leading_space_count, + line => $rfields->[0], + line_length => $rfield_lengths->[0], + side_comment_length => 0, + outdent_long_lines => $outdent_long_lines, + rvertical_tightness_flags => $rvertical_tightness_flags, + level => $level, + level_end => $level_end, + Kend => $Kend, + } + ); + return; } } else { - $zero_count = 0; + $self->[_zero_count_] = 0; } - # programming check: (shouldn't happen) - # an error here implies an incorrect call was made - if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) { - my $nt = @{$rtokens}; - my $nf = @{$rfields}; - warning( -"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $nt should be one less than number of fields: $nf)\n" - ); - report_definite_bug(); - } - my $maximum_line_length_for_level = maximum_line_length_for_level($level); + my $maximum_line_length_for_level = + $self->maximum_line_length_for_level($level); # -------------------------------------------------------------------- - # create an object to hold this line + # It simplifies things to create a zero length side comment + # if none exists. # -------------------------------------------------------------------- - my $new_line = Perl::Tidy::VerticalAligner::Line->new( - jmax => $jmax, - jmax_original_line => $jmax, - rtokens => $rtokens, - rfields => $rfields, - rpatterns => $rpatterns, - indentation => $indentation, - leading_space_count => $leading_space_count, - outdent_long_lines => $outdent_long_lines, - list_type => "", - is_hanging_side_comment => $is_hanging_side_comment, - maximum_line_length => $maximum_line_length_for_level, - rvertical_tightness_flags => $rvertical_tightness_flags, - is_terminal_ternary => $is_terminal_ternary, - j_terminal_match => $j_terminal_match, - ); + $self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths ); + $jmax = @{$rfields} - 1; # -------------------------------------------------------------------- - # It simplifies things to create a zero length side comment - # if none exists. + # create an object to hold this line # -------------------------------------------------------------------- - make_side_comment( $new_line, $level_end ); + my $new_line = Perl::Tidy::VerticalAligner::Line->new( + { + jmax => $jmax, + rtokens => $rtokens, + rfields => $rfields, + rpatterns => $rpatterns, + rfield_lengths => $rfield_lengths, + indentation => $indentation, + leading_space_count => $leading_space_count, + outdent_long_lines => $outdent_long_lines, + list_seqno => $list_seqno, + list_type => "", + is_hanging_side_comment => $is_hanging_side_comment, + maximum_line_length => $maximum_line_length_for_level, + rvertical_tightness_flags => $rvertical_tightness_flags, + is_terminal_ternary => $is_terminal_ternary, + j_terminal_match => $j_terminal_match, + end_group => $break_alignment_after, + Kend => $Kend, + ci_level => $ci_level, + level => $level, + level_end => $level_end, + imax_pair => -1, + } + ); # -------------------------------------------------------------------- # Decide if this is a simple list of items. - # There are 3 list types: none, comma, comma-arrow. - # We use this below to be less restrictive in deciding what to align. + # We use this to be less restrictive in deciding what to align. # -------------------------------------------------------------------- - if ($is_forced_break) { - decide_if_list($new_line); - } + decide_if_list($new_line) if ($list_seqno); # -------------------------------------------------------------------- # Append this line to the current group (or start new group) # -------------------------------------------------------------------- - if ( !@group_lines ) { - add_to_group($new_line); - } - else { - push_group_line($new_line); - } + + $self->push_group_line($new_line); # output this group if it ends in a terminal else or ternary line if ( defined($j_terminal_match) ) { - my_flush(); + $self->_flush_group_lines(); } # Force break after jump to lower level if ( $level_jump < 0 ) { - my_flush(); + $self->_flush_group_lines($level_jump); } # -------------------------------------------------------------------- # Some old debugging stuff # -------------------------------------------------------------------- - VALIGN_DEBUG_FLAG_APPEND && do { - print STDOUT "APPEND fields:"; + DEBUG_VALIGN && do { + print STDOUT "exiting valign_input fields:"; dump_array( @{$rfields} ); - print STDOUT "APPEND tokens:"; + print STDOUT "exiting valign_input tokens:"; dump_array( @{$rtokens} ); - print STDOUT "APPEND patterns:"; + print STDOUT "exiting valign_input patterns:"; dump_array( @{$rpatterns} ); - dump_alignments(); }; return; @@ -658,363 +715,107 @@ sub valign_input { sub join_hanging_comment { - my $line = shift; - my $jmax = $line->get_jmax(); - return 0 unless $jmax == 1; # must be 2 fields - my $rtokens = $line->get_rtokens(); - return 0 unless $rtokens->[0] eq '#'; # the second field is a comment.. - my $rfields = $line->get_rfields(); - return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty... - my $old_line = shift; + # Add dummy fields to a hanging side comment to make it look + # like the first line in its potential group. This simplifies + # the coding. + my ( $new_line, $old_line ) = @_; + + my $jmax = $new_line->get_jmax(); + + # must be 2 fields + return 0 unless $jmax == 1; + my $rtokens = $new_line->get_rtokens(); + + # the second field must be a comment + return 0 unless $rtokens->[0] eq '#'; + my $rfields = $new_line->get_rfields(); + + # the first field must be empty + return 0 unless $rfields->[0] =~ /^\s*$/; + + # the current line must have fewer fields my $maximum_field_index = $old_line->get_jmax(); return 0 - unless $maximum_field_index > $jmax; # the current line has more fields - my $rpatterns = $line->get_rpatterns(); + unless $maximum_field_index > $jmax; + + # looks ok.. + my $rpatterns = $new_line->get_rpatterns(); + my $rfield_lengths = $new_line->get_rfield_lengths(); - $line->set_is_hanging_side_comment(1); + $new_line->set_is_hanging_side_comment(1); $jmax = $maximum_field_index; - $line->set_jmax($jmax); + $new_line->set_jmax($jmax); $rfields->[$jmax] = $rfields->[1]; + $rfield_lengths->[$jmax] = $rfield_lengths->[1]; $rtokens->[ $jmax - 1 ] = $rtokens->[0]; $rpatterns->[ $jmax - 1 ] = $rpatterns->[0]; foreach my $j ( 1 .. $jmax - 1 ) { - $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why? + $rfields->[$j] = ''; + $rfield_lengths->[$j] = 0; $rtokens->[ $j - 1 ] = ""; $rpatterns->[ $j - 1 ] = ""; } return 1; } -sub eliminate_old_fields { - - my $new_line = shift; - my $jmax = $new_line->get_jmax(); - if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } - if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } - - # there must be one previous line - return unless ( @group_lines == 1 ); - - my $old_line = shift; - my $maximum_field_index = $old_line->get_jmax(); - - ############################################### - # Moved below to allow new coding for => matches - # return unless $maximum_field_index > $jmax; - ############################################### - - # Identify specific cases where field elimination is allowed: - # case=1: both lines have comma-separated lists, and the first - # line has an equals - # case=2: both lines have leading equals - - # case 1 is the default - my $case = 1; - - # See if case 2: both lines have leading '=' - # We'll require similar leading patterns in this case - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $rpatterns = $new_line->get_rpatterns(); - my $old_rpatterns = $old_line->get_rpatterns(); - if ( $rtokens->[0] =~ /^=>?\d*$/ - && $old_rtokens->[0] eq $rtokens->[0] - && $old_rpatterns->[0] eq $rpatterns->[0] ) - { - $case = 2; - } - - # not too many fewer fields in new line for case 1 - return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); - - # case 1 must have side comment - my $old_rfields = $old_line->get_rfields(); - return - if ( $case == 1 - && length( $old_rfields->[$maximum_field_index] ) == 0 ); - - my $rfields = $new_line->get_rfields(); - - my $hid_equals = 0; - - my @new_alignments = (); - my @new_fields = (); - my @new_matching_patterns = (); - my @new_matching_tokens = (); - - my $j = 0; - my $current_field = ''; - my $current_pattern = ''; - - # loop over all old tokens - my $in_match = 0; - foreach my $k ( 0 .. $maximum_field_index - 1 ) { - $current_field .= $old_rfields->[$k]; - $current_pattern .= $old_rpatterns->[$k]; - last if ( $j > $jmax - 1 ); - - if ( $old_rtokens->[$k] eq $rtokens->[$j] ) { - $in_match = 1; - $new_fields[$j] = $current_field; - $new_matching_patterns[$j] = $current_pattern; - $current_field = ''; - $current_pattern = ''; - $new_matching_tokens[$j] = $old_rtokens->[$k]; - $new_alignments[$j] = $old_line->get_alignment($k); - $j++; - } - else { - - if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) { - last if ( $case == 2 ); # avoid problems with stuff - # like: $a=$b=$c=$d; - $hid_equals = 1; - } - last - if ( $in_match && $case == 1 ) - ; # disallow gaps in matching field types in case 1 - } - } - - # Modify the current state if we are successful. - # We must exactly reach the ends of the new list for success, and the old - # pattern must have more fields. Here is an example where the first and - # second lines have the same number, and we should not align: - # my @a = map chr, 0 .. 255; - # my @b = grep /\W/, @a; - # my @c = grep /[^\w]/, @a; - - # Otherwise, we would get all of the commas aligned, which doesn't work as - # well: - # my @a = map chr, 0 .. 255; - # my @b = grep /\W/, @a; - # my @c = grep /[^\w]/, @a; - - if ( ( $j == $jmax ) - && ( $current_field eq '' ) - && ( $case != 1 || $hid_equals ) - && ( $maximum_field_index > $jmax ) ) - { - my $k = $maximum_field_index; - $current_field .= $old_rfields->[$k]; - $current_pattern .= $old_rpatterns->[$k]; - $new_fields[$j] = $current_field; - $new_matching_patterns[$j] = $current_pattern; - - $new_alignments[$j] = $old_line->get_alignment($k); - $maximum_field_index = $j; - - $old_line->set_alignments(@new_alignments); - $old_line->set_jmax($jmax); - $old_line->set_rtokens( \@new_matching_tokens ); - $old_line->set_rfields( \@new_fields ); - $old_line->set_rpatterns( \@{$rpatterns} ); - } - - # Dumb Down starting match if necessary: - # - # Consider the following two lines: - # - # { - # $a => 20 > 3 ? 1 : 0, - # $xyz => 5, - # } - - # We would like to get alignment regardless of the order of the two lines. - # If the lines come in in this order, then we will simplify the patterns of - # the first line in sub eliminate_new_fields. If the lines come in reverse - # order, then we achieve this with eliminate_new_fields. - - # This update is currently restricted to leading '=>' matches. Although we - # could do this for both '=' and '=>', overall the results for '=' come out - # better without this step because this step can eliminate some other good - # matches. For example, with the '=' we get: - -# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); -# my @dsf = map "$_\x{FFFE}Fred", @disilva; -# my @dsj = map "$_\x{FFFE}John", @disilva; -# my @dsJ = map "$_ John", @disilva; - - # without including '=' we get: - -# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); -# my @dsf = map "$_\x{FFFE}Fred", @disilva; -# my @dsj = map "$_\x{FFFE}John", @disilva; -# my @dsJ = map "$_ John", @disilva; - elsif ( - $case == 2 - - && @new_matching_tokens == 1 - ##&& $new_matching_tokens[0] =~ /^=/ # see note above - && $new_matching_tokens[0] =~ /^=>/ - && $maximum_field_index > 2 - ) - { - my $jmaxm = $jmax - 1; - my $kmaxm = $maximum_field_index - 1; - my $have_side_comment = $old_rtokens->[$kmaxm] eq '#'; - - # We need to reduce the group pattern to be just two tokens, - # the leading equality or => and the final side comment - - my $mid_field = join "", - @{$old_rfields}[ 1 .. $maximum_field_index - 1 ]; - my $mid_patterns = join "", - @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ]; - my @new_alignments = ( - $old_line->get_alignment(0), - $old_line->get_alignment( $maximum_field_index - 1 ) - ); - my @new_tokens = - ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] ); - my @new_fields = ( - $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index] - ); - my @new_patterns = ( - $old_rpatterns->[0], $mid_patterns, - $old_rpatterns->[$maximum_field_index] - ); +sub make_side_comment { - $maximum_field_index = 2; - $old_line->set_jmax($maximum_field_index); - $old_line->set_rtokens( \@new_tokens ); - $old_line->set_rfields( \@new_fields ); - $old_line->set_rpatterns( \@new_patterns ); + # create an empty side comment if none exists - initialize_for_new_group(); - add_to_group($old_line); - } - return; -} + my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_; -# create an empty side comment if none exists -sub make_side_comment { - my ( $new_line, $level_end ) = @_; - my $jmax = $new_line->get_jmax(); - my $rtokens = $new_line->get_rtokens(); + my $jmax = @{$rfields} - 1; # if line does not have a side comment... if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) { - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - $rtokens->[$jmax] = '#'; - $rfields->[ ++$jmax ] = ''; - $rpatterns->[$jmax] = '#'; - $new_line->set_jmax($jmax); - $new_line->set_jmax_original_line($jmax); - } - - # line has a side comment.. - else { - - # don't remember old side comment location for very long - my $line_number = $vertical_aligner_self->get_output_line_number(); - my $rfields = $new_line->get_rfields(); - if ( - $line_number - $last_side_comment_line_number > 12 - - # and don't remember comment location across block level changes - || ( $level_end < $last_side_comment_level - && $rfields->[0] =~ /^}/ ) - ) - { - forget_side_comment(); - } - $last_side_comment_line_number = $line_number; - $last_side_comment_level = $level_end; - } - return; -} - -sub decide_if_list { - - my $line = shift; - - # A list will be taken to be a line with a forced break in which all - # of the field separators are commas or comma-arrows (except for the - # trailing #) - - # List separator tokens are things like ',3' or '=>2', - # where the trailing digit is the nesting depth. Allow braces - # to allow nested list items. - my $rtokens = $line->get_rtokens(); - my $test_token = $rtokens->[0]; - if ( $test_token =~ /^(\,|=>)/ ) { - my $list_type = $test_token; - my $jmax = $line->get_jmax(); - - foreach ( 1 .. $jmax - 2 ) { - if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) { - $list_type = ""; - last; - } - } - $line->set_list_type($list_type); + $jmax += 1; + $rtokens->[ $jmax - 1 ] = '#'; + $rfields->[$jmax] = ''; + $rfield_lengths->[$jmax] = 0; + $rpatterns->[$jmax] = '#'; } return; } -sub eliminate_new_fields { - - my ( $new_line, $old_line ) = @_; - return unless (@group_lines); - my $jmax = $new_line->get_jmax(); - - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $is_assignment = - ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); - - # must be monotonic variation - return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); +{ ## closure for sub decide_if_list - # must be more fields in the new line - my $maximum_field_index = $old_line->get_jmax(); - return unless ( $maximum_field_index < $jmax ); + my %is_comma_token; - unless ($is_assignment) { - return - unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) - ; # only if monotonic + BEGIN { - # never combine fields of a comma list - return - unless ( $maximum_field_index > 1 ) - && ( $new_line->get_list_type() !~ /^,/ ); + my @q = qw( => ); + push @q, ','; + @is_comma_token{@q} = (1) x scalar(@q); } - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $old_rpatterns = $old_line->get_rpatterns(); + sub decide_if_list { - # loop over all OLD tokens except comment and check match - my $match = 1; - foreach my $k ( 0 .. $maximum_field_index - 2 ) { - if ( ( $old_rtokens->[$k] ne $rtokens->[$k] ) - || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) ) - { - $match = 0; - last; - } - } + my $line = shift; - # first tokens agree, so combine extra new tokens - if ($match) { - foreach my $k ( $maximum_field_index .. $jmax - 1 ) { + # A list will be taken to be a line with a forced break in which all + # of the field separators are commas or comma-arrows (except for the + # trailing #) - $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k]; - $rfields->[$k] = ""; - $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k]; - $rpatterns->[$k] = ""; + my $rtokens = $line->get_rtokens(); + my $test_token = $rtokens->[0]; + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token($test_token); + if ( $is_comma_token{$raw_tok} ) { + my $list_type = $test_token; + my $jmax = $line->get_jmax(); + + foreach ( 1 .. $jmax - 2 ) { + ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token( $rtokens->[$_] ); + if ( !$is_comma_token{$raw_tok} ) { + $list_type = ""; + last; + } + } + $line->set_list_type($list_type); } - - $rtokens->[ $maximum_field_index - 1 ] = '#'; - $rfields->[$maximum_field_index] = $rfields->[$jmax]; - $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax]; - $jmax = $maximum_field_index; + return; } - $new_line->set_jmax($jmax); - return; } sub fix_terminal_ternary { @@ -1028,18 +829,14 @@ sub fix_terminal_ternary { # : $year % 400 ? 0 # : 1; # - # returns 1 if the terminal item should be indented + # returns the index of the terminal question token, if any - my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_; - return unless ($old_line); + my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths, + $group_level ) + = @_; -## FUTURE CODING -## my ( $old_line, $end_line ) = @_; -## return unless ( $old_line && $end_line ); -## -## my $rfields = $end_line->get_rfields(); -## my $rpatterns = $end_line->get_rpatterns(); -## my $rtokens = $end_line->get_rtokens(); + return unless ($old_line); + use constant EXPLAIN_TERNARY => 0; my $jmax = @{$rfields} - 1; my $rfields_old = $old_line->get_rfields(); @@ -1051,18 +848,21 @@ sub fix_terminal_ternary { # look for the question mark after the : my ($jquestion); my $depth_question; - my $pad = ""; + my $pad = ""; + my $pad_length = 0; foreach my $j ( 0 .. $maximum_field_index - 1 ) { my $tok = $rtokens_old->[$j]; - if ( $tok =~ /^\?(\d+)$/ ) { - $depth_question = $1; + my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok); + if ( $raw_tok eq '?' ) { + $depth_question = $lev; # depth must be correct next unless ( $depth_question eq $group_level ); $jquestion = $j; if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { - $pad = " " x length($1); + $pad_length = length($1); + $pad = " " x $pad_length; } else { return; # shouldn't happen @@ -1079,11 +879,12 @@ sub fix_terminal_ternary { # Work on copies of the actual arrays in case we have # to return due to an error - my @fields = @{$rfields}; - my @patterns = @{$rpatterns}; - my @tokens = @{$rtokens}; + my @fields = @{$rfields}; + my @patterns = @{$rpatterns}; + my @tokens = @{$rtokens}; + my @field_lengths = @{$rfield_lengths}; - VALIGN_DEBUG_FLAG_TERNARY && do { + EXPLAIN_TERNARY && do { local $" = '><'; print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; @@ -1109,8 +910,12 @@ sub fix_terminal_ternary { # Note that this padding will remain even if the terminal value goes # out on a separate line. This does not seem to look to bad, so no # mechanism has been included to undo it. - my $field1 = shift @fields; + my $field1 = shift @fields; + my $field_length1 = shift @field_lengths; + my $len_colon = length($colon); unshift @fields, ( $colon, $pad . $therest ); + unshift @field_lengths, + ( $len_colon, $pad_length + $field_length1 - $len_colon ); # change the leading pattern from : to ? return unless ( $patterns[0] =~ s/^\:/?/ ); @@ -1120,7 +925,8 @@ sub fix_terminal_ternary { unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); # insert appropriate number of empty fields - splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd; } # handle sub-case of first field just equal to leading colon. @@ -1136,13 +942,15 @@ sub fix_terminal_ternary { $patterns[1] = "?b" . $patterns[1]; # pad the second field - $fields[1] = $pad . $fields[1]; + $fields[1] = $pad . $fields[1]; + $field_lengths[1] = $pad_length + $field_lengths[1]; # install leading tokens and patterns of existing line, replacing # leading token and inserting appropriate number of empty fields splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); - splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd; } } @@ -1158,12 +966,14 @@ sub fix_terminal_ternary { unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); # insert appropriate number of empty fields - $jadd = $jquestion + 1; - $fields[0] = $pad . $fields[0]; - splice( @fields, 0, 0, ('') x $jadd ) if $jadd; + $jadd = $jquestion + 1; + $fields[0] = $pad . $fields[0]; + $field_lengths[0] = $pad_length + $field_lengths[0]; + splice( @fields, 0, 0, ('') x $jadd ) if $jadd; + splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd; } - VALIGN_DEBUG_FLAG_TERNARY && do { + EXPLAIN_TERNARY && do { local $" = '><'; print STDOUT "MODIFIED TOKENS=<@tokens>\n"; print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; @@ -1171,13 +981,10 @@ sub fix_terminal_ternary { }; # all ok .. update the arrays - @{$rfields} = @fields; - @{$rtokens} = @tokens; - @{$rpatterns} = @patterns; -## FUTURE CODING -## $end_line->set_rfields( \@fields ); -## $end_line->set_rtokens( \@tokens ); -## $end_line->set_rpatterns( \@patterns ); + @{$rfields} = @fields; + @{$rtokens} = @tokens; + @{$rpatterns} = @patterns; + @{$rfield_lengths} = @field_lengths; # force a flush after this line return $jquestion; @@ -1194,7 +1001,8 @@ sub fix_terminal_else { # # returns a positive value if the else block should be indented # - my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_; + my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_; + return unless ($old_line); my $jmax = @{$rfields} - 1; return unless ( $jmax > 0 ); @@ -1240,608 +1048,214 @@ sub fix_terminal_else { my $jadd = $jbrace - $jparen; splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); - splice( @{$rfields}, 1, 0, ('') x $jadd ); + splice( @{$rfields}, 1, 0, ('') x $jadd ); + splice( @{$rfield_lengths}, 1, 0, (0) x $jadd ); # force a flush after this line if it does not follow a case if ( $rfields_old->[0] =~ /^case\s*$/ ) { return } else { return $jbrace } } -{ # sub check_match - my %is_good_alignment; +my %is_closing_block_type; - BEGIN { +BEGIN { + @_ = qw< } ] >; + @is_closing_block_type{@_} = (1) x scalar(@_); +} - # Vertically aligning on certain "good" tokens is usually okay - # so we can be less restrictive in marginal cases. - my @q = qw( { ? => = ); - push @q, (','); - @is_good_alignment{@q} = (1) x scalar(@q); - } +sub check_match { - sub check_match { + # See if the current line matches the current vertical alignment group. - # See if the current line matches the current vertical alignment group. - # If not, flush the current group. - my ( $new_line, $old_line ) = @_; + my ( $self, $new_line, $base_line, $prev_line ) = @_; - # uses global variables: - # $previous_minimum_jmax_seen - # $maximum_jmax_seen - # $marginal_match - my $jmax = $new_line->get_jmax(); - my $maximum_field_index = $old_line->get_jmax(); + # Given: + # $new_line = the line being considered for group inclusion + # $base_line = the first line of the current group + # $prev_line = the line just before $new_line - # flush if this line has too many fields - # variable $GoToLoc indicates goto branch point, for debugging - my $GoToLoc = 1; - if ( $jmax > $maximum_field_index ) { goto NO_MATCH } + # returns a flag and a value as follows: + # return (0, $imax_align) if the line does not match + # return (1, $imax_align) if the line matches but does not fit + # return (2, $imax_align) if the line matches and fits - # flush if adding this line would make a non-monotonic field count - if ( - ( $maximum_field_index > $jmax ) # this has too few fields - && ( - ( $previous_minimum_jmax_seen < - $jmax ) # and wouldn't be monotonic - || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) - ) - ) - { - $GoToLoc = 2; - goto NO_MATCH; - } + # Returns '$imax_align' which is the index of the maximum matching token. + # It will be used in the subsequent left-to-right sweep to align as many + # tokens as possible for lines which partially match. + my $imax_align = -1; - # otherwise see if this line matches the current group - my $jmax_original_line = $new_line->get_jmax_original_line(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $list_type = $new_line->get_list_type(); - - my $group_list_type = $old_line->get_list_type(); - my $old_rpatterns = $old_line->get_rpatterns(); - my $old_rtokens = $old_line->get_rtokens(); - - my $jlimit = $jmax - 1; - if ( $maximum_field_index > $jmax ) { - $jlimit = $jmax_original_line; - --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); - } - - # handle comma-separated lists .. - if ( $group_list_type && ( $list_type eq $group_list_type ) ) { - for my $j ( 0 .. $jlimit ) { - my $old_tok = $old_rtokens->[$j]; - next unless $old_tok; - my $new_tok = $rtokens->[$j]; - next unless $new_tok; - - # lists always match ... - # unless they would align any '=>'s with ','s - $GoToLoc = 3; - goto NO_MATCH - if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ - || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); - } - } - - # do detailed check for everything else except hanging side comments - elsif ( !$is_hanging_side_comment ) { - - my $leading_space_count = $new_line->get_leading_space_count(); - - my $max_pad = 0; - my $min_pad = 0; - my $saw_good_alignment; - - for my $j ( 0 .. $jlimit ) { - - my $old_tok = $old_rtokens->[$j]; - my $new_tok = $rtokens->[$j]; - - # Note on encoding used for alignment tokens: - # ------------------------------------------- - # Tokens are "decorated" with information which can help - # prevent unwanted alignments. Consider for example the - # following two lines: - # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); - # local ( $i, $f ) = &'bdiv( $xn, $xd ); - # There are three alignment tokens in each line, a comma, - # an =, and a comma. In the first line these three tokens - # are encoded as: - # ,4+local-18 =3 ,4+split-7 - # and in the second line they are encoded as - # ,4+local-18 =3 ,4+&'bdiv-8 - # Tokens always at least have token name and nesting - # depth. So in this example the ='s are at depth 3 and - # the ,'s are at depth 4. This prevents aligning tokens - # of different depths. Commas contain additional - # information, as follows: - # , {depth} + {container name} - {spaces to opening paren} - # This allows us to reject matching the rightmost commas - # in the above two lines, since they are for different - # function calls. This encoding is done in - # 'sub send_lines_to_vertical_aligner'. - - # Pick off actual token. - # Everything up to the first digit is the actual token. - my $alignment_token = $new_tok; - if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } - - # see if the decorated tokens match - my $tokens_match = $new_tok eq $old_tok - - # Exception for matching terminal : of ternary statement.. - # consider containers prefixed by ? and : a match - || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); - - # No match if the alignment tokens differ... - if ( !$tokens_match ) { - - # ...Unless this is a side comment - if ( - $j == $jlimit - - # and there is either at least one alignment token - # or this is a single item following a list. This - # latter rule is required for 'December' to join - # the following list: - # my (@months) = ( - # '', 'January', 'February', 'March', - # 'April', 'May', 'June', 'July', - # 'August', 'September', 'October', 'November', - # 'December' - # ); - # If it doesn't then the -lp formatting will fail. - && ( $j > 0 || $old_tok =~ /^,/ ) - ) - { - $marginal_match = 1 - if ( $marginal_match == 0 - && @group_lines == 1 ); - last; - } + # variable $GoToMsg explains reason for no match, for debugging + my $GoToMsg = ""; + use constant EXPLAIN_CHECK_MATCH => 0; - $GoToLoc = 4; - goto NO_MATCH; - } + # This is a flag for testing alignment by sub sweep_left_to_right only. + # This test can help find problems with the alignment logic. + # This flag should normally be zero. + use constant TEST_SWEEP_ONLY => 0; - # Calculate amount of padding required to fit this in. - # $pad is the number of spaces by which we must increase - # the current field to squeeze in this field. - my $pad = - length( $rfields->[$j] ) - $old_line->current_field_width($j); - if ( $j == 0 ) { $pad += $leading_space_count; } - - # remember max pads to limit marginal cases - if ( $alignment_token ne '#' ) { - if ( $pad > $max_pad ) { $max_pad = $pad } - if ( $pad < $min_pad ) { $min_pad = $pad } - } - if ( $is_good_alignment{$alignment_token} ) { - $saw_good_alignment = 1; - } + my $jmax = $new_line->get_jmax(); + my $maximum_field_index = $base_line->get_jmax(); - # If patterns don't match, we have to be careful... - if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) { - - # flag this as a marginal match since patterns differ - $marginal_match = 1 - if ( $marginal_match == 0 && @group_lines == 1 ); - - # We have to be very careful about aligning commas - # when the pattern's don't match, because it can be - # worse to create an alignment where none is needed - # than to omit one. Here's an example where the ','s - # are not in named containers. The first line below - # should not match the next two: - # ( $a, $b ) = ( $b, $r ); - # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); - # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); - if ( $alignment_token eq ',' ) { - - # do not align commas unless they are in named containers - $GoToLoc = 5; - goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); - } + my $jlimit = $jmax - 2; + if ( $jmax > $maximum_field_index ) { + $jlimit = $maximum_field_index - 2; + } - # do not align parens unless patterns match; - # large ugly spaces can occur in math expressions. - elsif ( $alignment_token eq '(' ) { + if ( $new_line->get_is_hanging_side_comment() ) { - # But we can allow a match if the parens don't - # require any padding. - $GoToLoc = 6; - if ( $pad != 0 ) { goto NO_MATCH } - } + # HSC's can join the group if they fit + } - # Handle an '=' alignment with different patterns to - # the left. - elsif ( $alignment_token eq '=' ) { - - # It is best to be a little restrictive when - # aligning '=' tokens. Here is an example of - # two lines that we will not align: - # my $variable=6; - # $bb=4; - # The problem is that one is a 'my' declaration, - # and the other isn't, so they're not very similar. - # We will filter these out by comparing the first - # letter of the pattern. This is crude, but works - # well enough. - if ( - substr( $old_rpatterns->[$j], 0, 1 ) ne - substr( $rpatterns->[$j], 0, 1 ) ) - { - $GoToLoc = 7; - goto NO_MATCH; - } + # Everything else + else { - # If we pass that test, we'll call it a marginal match. - # Here is an example of a marginal match: - # $done{$$op} = 1; - # $op = compile_bblock($op); - # The left tokens are both identifiers, but - # one accesses a hash and the other doesn't. - # We'll let this be a tentative match and undo - # it later if we don't find more than 2 lines - # in the group. - elsif ( @group_lines == 1 ) { - $marginal_match = - 2; # =2 prevents being undone below - } - } - } + # A group with hanging side comments ends with the first non hanging + # side comment. + if ( $base_line->get_is_hanging_side_comment() ) { + $GoToMsg = "end of hanging side comments"; + goto NO_MATCH; + } - # Don't let line with fewer fields increase column widths - # ( align3.t ) - if ( $maximum_field_index > $jmax ) { + # The number of tokens that this line shares with the previous line + # has been stored with the previous line. This value was calculated + # and stored by sub 'match_line_pair'. + $imax_align = $prev_line->get_imax_pair(); - # Exception: suspend this rule to allow last lines to join - $GoToLoc = 8; - if ( $pad > 0 ) { goto NO_MATCH; } - } - } ## end for my $j ( 0 .. $jlimit) - - # Turn off the "marginal match" flag in some cases... - # A "marginal match" occurs when the alignment tokens agree - # but there are differences in the other tokens (patterns). - # If we leave the marginal match flag set, then the rule is that we - # will align only if there are more than two lines in the group. - # We will turn of the flag if we almost have a match - # and either we have seen a good alignment token or we - # just need a small pad (2 spaces) to fit. These rules are - # the result of experimentation. Tokens which misaligned by just - # one or two characters are annoying. On the other hand, - # large gaps to less important alignment tokens are also annoying. - if ( $marginal_match == 1 - && $jmax == $maximum_field_index - && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) - ) - { - $marginal_match = 0; - } - ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; + if ( $imax_align != $jlimit ) { + $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n"; + goto NO_MATCH; } - # We have a match (even if marginal). - # If the current line has fewer fields than the current group - # but otherwise matches, copy the remaining group fields to - # make it a perfect match. - if ( $maximum_field_index > $jmax ) { + } + + # The tokens match, but the lines must have identical number of + # tokens to join the group. + if ( $maximum_field_index != $jmax ) { + $GoToMsg = "token count differs"; + goto NO_MATCH; + } - ########################################################## - # FIXME: The previous version had a bug which made side comments - # become regular fields, so for now the program does not allow a - # line with side comment to match. This should eventually be done. - # The best test file for experimenting is 'lista.t' - ########################################################## + # The tokens match. Now See if there is space for this line in the + # current group. + if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) { - my $comment = $rfields->[$jmax]; - $GoToLoc = 9; - goto NO_MATCH if ($comment); + EXPLAIN_CHECK_MATCH + && print "match and fit, imax_align=$imax_align, jmax=$jmax\n"; + return ( 2, $jlimit ); + } + else { - # Corrected loop - for my $jj ( $jlimit .. $maximum_field_index ) { - $rtokens->[$jj] = $old_rtokens->[$jj]; - $rfields->[ $jj + 1 ] = ''; - $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ]; - } + EXPLAIN_CHECK_MATCH + && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n"; + return ( 1, $jlimit ); + } -## THESE DO NOT GIVE CORRECT RESULTS -## $rfields->[$jmax] = $comment; -## $new_line->set_jmax($jmax); + NO_MATCH: - } - return; + EXPLAIN_CHECK_MATCH + && print + "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n"; - NO_MATCH: + return ( 0, $imax_align ); +} - # variable $GoToLoc is for debugging - #print "no match from $GoToLoc\n"; +sub check_fit { - # Make one last effort to retain a match of certain statements - my $match = salvage_equality_matches( $new_line, $old_line ); - my_flush_code() unless ($match); + my ( $self, $new_line, $old_line ) = @_; + + # The new line has alignments identical to the current group. Now we have + # to fit the new line into the group without causing a field to exceed the + # line length limit. + # return true if successful + # return false if not successful + + my $jmax = $new_line->get_jmax(); + my $leading_space_count = $new_line->get_leading_space_count(); + my $rfield_lengths = $new_line->get_rfield_lengths(); + my $padding_available = $old_line->get_available_space_on_right(); + my $jmax_old = $old_line->get_jmax(); + + # Safety check ... only lines with equal array sizes should arrive here + # from sub check_match. So if this error occurs, look at recent changes in + # sub check_match. It is only supposed to check the fit of lines with + # identical numbers of alignment tokens. + if ( $jmax_old ne $jmax ) { + + $self->warning(<get_alignments(); + foreach my $alignment (@alignments) { + $alignment->save_column(); + } - # Reduce the complexity of the two lines if it will allow us to retain - # alignment of some common alignments, including '=' and '=>'. We will - # convert both lines to have just two matching tokens, the equality and the - # side comment. - - # return 0 or undef if unsuccessful - # return 1 if successful - - # Here is a very simple example of two lines where we could at least - # align the equals: - # $x = $class->_sub( $x, $delta ); - # $xpownm1 = $class->_pow( $class->_copy($x), $nm1 ); # x(i)^(n-1) - - # We will only do this if there is one old line (and one new line) - return unless ( @group_lines == 1 ); - return if ($is_matching_terminal_line); - - # We are only looking for equality type statements - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $is_equals = - ( $rtokens->[0] =~ /=/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); - return unless ($is_equals); - - # The leading patterns must match - my $old_rpatterns = $old_line->get_rpatterns(); - my $rpatterns = $new_line->get_rpatterns(); - return if ( $old_rpatterns->[0] ne $rpatterns->[0] ); - - # Both should have side comment fields (should always be true) - my $jmax_old = $old_line->get_jmax(); - my $jmax_new = $new_line->get_jmax(); - my $end_tok_old = $old_rtokens->[ $jmax_old - 1 ]; - my $end_tok_new = $rtokens->[ $jmax_new - 1 ]; - my $have_side_comments = - defined($end_tok_old) - && $end_tok_old eq '#' - && defined($end_tok_new) - && $end_tok_new eq '#'; - if ( !$have_side_comments ) { return; } - - # Do not match if any remaining tokens in new line include '?', 'if', - # 'unless','||', '&&'. The reason is that (1) this isn't a great match, and - # (2) we will prevent possibly better matchs to follow. Here is an - # example. The match of the first two lines is rejected, and this allows - # the second and third lines to match. - # my $type = shift || "o"; - # my $fname = ( $type eq 'oo' ? 'orte_city' : 'orte' ); - # my $suffix = ( $coord_system eq 'standard' ? '' : '-orig' ); - # This logic can cause some unwanted losses of alignments, but it can retain - # long runs of multiple-token alignments, so overall it is worthwhile. - # If we had a peek at the subsequent line we could make a much better - # decision here, but for now this is not available. - for ( my $j = 1 ; $j < $jmax_new - 1 ; $j++ ) { - my $new_tok = $rtokens->[$j]; - - # git#16: do not consider fat commas as good aligmnents here - my $is_good_alignment = - ( $new_tok =~ /^(=|\?|if|unless|\|\||\&\&)/ && $new_tok !~ /^=>/ ); - return if ($is_good_alignment); - } - - my $squeeze_line = sub { - my ($line_obj) = @_; - - # reduce a line down to the three fields surrounding - # the two tokens, an '=' of some sort and a '#' at the end - - my $jmax = $line_obj->get_jmax(); - my $jmax_new = 2; - return unless $jmax > $jmax_new; - my $rfields = $line_obj->get_rfields(); - my $rpatterns = $line_obj->get_rpatterns(); - my $rtokens = $line_obj->get_rtokens(); - my $rfields_new = [ - $rfields->[0], join( '', @{$rfields}[ 1 .. $jmax - 1 ] ), - $rfields->[$jmax] - ]; - my $rpatterns_new = [ - $rpatterns->[0], join( '', @{$rpatterns}[ 1 .. $jmax - 1 ] ), - $rpatterns->[$jmax] - ]; - my $rtokens_new = [ $rtokens->[0], $rtokens->[ $jmax - 1 ] ]; - $line_obj->{_rfields} = $rfields_new; - $line_obj->{_rpatterns} = $rpatterns_new; - $line_obj->{_rtokens} = $rtokens_new; - $line_obj->set_jmax($jmax_new); - }; - - # Okay, we will force a match at the equals-like token. We will fix both - # lines to have just 2 tokens and 3 fields: - $squeeze_line->($new_line); - $squeeze_line->($old_line); - - # start over with a new group - initialize_for_new_group(); - add_to_group($old_line); - return 1; -} - -sub check_fit { - - my ( $new_line, $old_line ) = @_; - return unless (@group_lines); - - my $jmax = $new_line->get_jmax(); - my $leading_space_count = $new_line->get_leading_space_count(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - - my $group_list_type = $group_lines[0]->get_list_type(); - - my $padding_so_far = 0; - my $padding_available = $old_line->get_available_space_on_right(); - - # save current columns in case this doesn't work - save_alignment_columns(); + my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); + # Loop over all alignments ... my $maximum_field_index = $old_line->get_jmax(); for my $j ( 0 .. $jmax ) { - my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j); + my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j); if ( $j == 0 ) { $pad += $leading_space_count; } - # remember largest gap of the group, excluding gap to side comment - if ( $pad < 0 - && $group_maximum_gap < -$pad - && $j > 0 - && $j < $jmax - 1 ) - { - $group_maximum_gap = -$pad; - } - + # Keep going if this field does not need any space. next if $pad < 0; - ## OLD NOTES: - ## This patch helps sometimes, but it doesn't check to see if - ## the line is too long even without the side comment. It needs - ## to be reworked. - ##don't let a long token with no trailing side comment push - ##side comments out, or end a group. (sidecmt1.t) - ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0); - - # BEGIN PATCH for keith1.txt. - # If the group began matching multiple tokens but later this got - # reduced to a fewer number of matching tokens, then the fields - # of the later lines will still have to fit into their corresponding - # fields. So a large later field will "push" the other fields to - # the right, including previous side comments, and if there is no room - # then there is no match. - # For example, look at the last line in the following snippet: - - # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false; - # my $env = ($b_prod_db) ? "prd" : "val"; - # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL"; - # my $task = $OPT{t}; - # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash"; - - # The long term will push the '?' to the right to fit in, and in this - # case there is not enough room so it will not match the equals unless - # we do something special. - - # Usually it looks good to keep an initial alignment of '=' going, and - # we can do this if the long term can fit in the space taken up by the - # remaining fields (the ? : fields here). - - # Allowing any matching token for now, but it could be restricted - # to an '='-like token if necessary. + # See if it needs too much space. + if ( $pad > $padding_available ) { - if ( - $pad > $padding_available - && $jmax == 2 # matching one thing (plus #) - && $j == $jmax - 1 # at last field - && @group_lines > 1 # more than 1 line in group now - && $jmax < $maximum_field_index # other lines have more fields - && length( $rfields->[$jmax] ) == 0 # no side comment - - # Uncomment to match only equals (but this does not seem necessary) - # && $rtokens->[0] =~ /^=\d/ # matching an equals - ) - { - my $extra_padding = 0; - foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) { - $extra_padding += $old_line->current_field_width($jj); + ################################################ + # Line does not fit -- revert to starting state + ################################################ + foreach my $alignment (@alignments) { + $alignment->restore_column(); } - - next if ( $pad <= $padding_available + $extra_padding ); - } - - # END PATCH for keith1.pl - - # This line will need space; lets see if we want to accept it.. - if ( - - # not if this won't fit - ( $pad > $padding_available ) - - # previously, there were upper bounds placed on padding here - # (maximum_whitespace_columns), but they were not really helpful - - ) - { - - # revert to starting state then flush; things didn't work out - restore_alignment_columns(); - my_flush_code(); - last; + return; } - # patch to avoid excessive gaps in previous lines, - # due to a line of fewer fields. - # return join( ".", - # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, - # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); - next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); - - # looks ok, squeeze this field in + # make room for this field $old_line->increase_field_width( $j, $pad ); $padding_available -= $pad; - - # remember largest gap of the group, excluding gap to side comment - if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { - $group_maximum_gap = $pad; - } } - return; -} - -sub add_to_group { - # The current line either starts a new alignment group or is - # accepted into the current alignment group. - my ($new_line) = @_; - push_group_line($new_line); + ###################################### + # The line fits, the match is accepted + ###################################### + return 1; - # initialize field lengths if starting new group - if ( @group_lines == 1 ) { +} - my $jmax = $new_line->get_jmax(); - my $rfields = $new_line->get_rfields(); - my $rtokens = $new_line->get_rtokens(); - my $col = $new_line->get_leading_space_count(); +sub install_new_alignments { - for my $j ( 0 .. $jmax ) { - $col += length( $rfields->[$j] ); + my ($new_line) = @_; - # create initial alignments for the new group - my $token = ""; - if ( $j < $jmax ) { $token = $rtokens->[$j] } - my $alignment = make_alignment( $col, $token ); - $new_line->set_alignment( $j, $alignment ); - } + my $jmax = $new_line->get_jmax(); + my $rfield_lengths = $new_line->get_rfield_lengths(); + my $col = $new_line->get_leading_space_count(); - $maximum_jmax_seen = $jmax; - $minimum_jmax_seen = $jmax; - } + for my $j ( 0 .. $jmax ) { + $col += $rfield_lengths->[$j]; - # use previous alignments otherwise - else { - my @new_alignments = $group_lines[-2]->get_alignments(); - $new_line->set_alignments(@new_alignments); + # create initial alignments for the new group + my $alignment = + Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } ); + $new_line->set_alignment( $j, $alignment ); } + return; +} - # remember group jmax extremes for next call to valign_input - $previous_minimum_jmax_seen = $minimum_jmax_seen; - $previous_maximum_jmax_seen = $maximum_jmax_seen; +sub copy_old_alignments { + my ( $new_line, $old_line ) = @_; + my @new_alignments = $old_line->get_alignments(); + $new_line->set_alignments(@new_alignments); return; } @@ -1853,59 +1267,13 @@ sub dump_array { return; } -# flush() sends the current Perl::Tidy::VerticalAligner group down the -# pipeline to Perl::Tidy::FileWriter. - -# This is the external flush, which also empties the buffer and cache -sub flush { - - # the buffer must be emptied first, then any cached text - dump_valign_buffer(); - - if (@group_lines) { - my_flush(); - } - else { - if ($cached_line_type) { - $seqno_string = $cached_seqno_string; - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); - $cached_line_type = 0; - $cached_line_text = ""; - $cached_seqno_string = ""; - } - } - return; -} - -sub reduce_valign_buffer_indentation { - - my ($diff) = @_; - if ( $valign_buffer_filling && $diff ) { - my $max_valign_buffer = @valign_buffer; - foreach my $i ( 0 .. $max_valign_buffer - 1 ) { - my ( $line, $leading_space_count, $level ) = - @{ $valign_buffer[$i] }; - my $ws = substr( $line, 0, $diff ); - if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { - $line = substr( $line, $diff ); - } - if ( $leading_space_count >= $diff ) { - $leading_space_count -= $diff; - $level = level_change( $leading_space_count, $diff, $level ); - } - $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; - } - } - return; -} - sub level_change { # compute decrease in level when we remove $diff spaces from the # leading spaces - my ( $leading_space_count, $diff, $level ) = @_; + my ( $self, $leading_space_count, $diff, $level ) = @_; + + my $rOpts_indent_columns = $self->[_rOpts_indent_columns_]; if ($rOpts_indent_columns) { my $olev = int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); @@ -1916,32 +1284,30 @@ sub level_change { return $level; } -sub dump_valign_buffer { - if (@valign_buffer) { - foreach (@valign_buffer) { - valign_output_step_D( @{$_} ); - } - @valign_buffer = (); - } - $valign_buffer_filling = ""; - return; -} +############################################### +# CODE SECTION 4: Code to process comment lines +############################################### -sub my_flush_comment { +sub _flush_comment_lines { - # Output a group of COMMENT lines + # Output a group consisting of COMMENT lines - return unless (@group_lines); - my $leading_space_count = $comment_leading_space_count; - my $leading_string = get_leading_string($leading_space_count); + my ($self) = @_; + my $rgroup_lines = $self->[_rgroup_lines_]; + return unless ( @{$rgroup_lines} ); + my $group_level = $self->[_group_level_]; + my $leading_space_count = $self->[_comment_leading_space_count_]; + my $leading_string = + $self->get_leading_string( $leading_space_count, $group_level ); # look for excessively long lines my $max_excess = 0; - foreach my $str (@group_lines) { + foreach my $item ( @{$rgroup_lines} ) { + my ( $str, $str_len ) = @{$item}; my $excess = - length($str) + + $str_len + $leading_space_count - - maximum_line_length_for_level($group_level); + $self->maximum_line_length_for_level($group_level); if ( $excess > $max_excess ) { $max_excess = $excess; } @@ -1951,122 +1317,286 @@ sub my_flush_comment { if ( $max_excess > 0 ) { $leading_space_count -= $max_excess; if ( $leading_space_count < 0 ) { $leading_space_count = 0 } - $last_outdented_line_at = $file_writer_object->get_output_line_number(); + my $file_writer_object = $self->[_file_writer_object_]; + my $last_outdented_line_at = + $file_writer_object->get_output_line_number(); + $self->[_last_outdented_line_at_] = $last_outdented_line_at; + my $outdented_line_count = $self->[_outdented_line_count_]; unless ($outdented_line_count) { - $first_outdented_line_at = $last_outdented_line_at; + $self->[_first_outdented_line_at_] = $last_outdented_line_at; } - my $nlines = @group_lines; + my $nlines = @{$rgroup_lines}; $outdented_line_count += $nlines; + $self->[_outdented_line_count_] = $outdented_line_count; } # write the lines my $outdent_long_lines = 0; - foreach my $line (@group_lines) { - valign_output_step_B( $leading_space_count, $line, 0, - $outdent_long_lines, "", $group_level ); + + foreach my $item ( @{$rgroup_lines} ) { + my ( $str, $str_len, $Kend ) = @{$item}; + $self->valign_output_step_B( + { + leading_space_count => $leading_space_count, + line => $str, + line_length => $str_len, + side_comment_length => 0, + outdent_long_lines => $outdent_long_lines, + rvertical_tightness_flags => "", + level => $group_level, + level_end => $group_level, + Kend => $Kend, + } + ); } - initialize_for_new_group(); + $self->initialize_for_new_group(); return; } -sub my_flush_code { +###################################################### +# CODE SECTION 5: Code to process groups of code lines +###################################################### + +sub _flush_group_lines { + + # This is the vertical aligner internal flush, which leaves the cache + # intact + my ( $self, $level_jump ) = @_; - # Output a group of CODE lines + # $level_jump = $next_level-$group_level, if known + # = undef if not known - return unless (@group_lines); + my $rgroup_lines = $self->[_rgroup_lines_]; + return unless ( @{$rgroup_lines} ); + my $group_type = $self->[_group_type_]; + my $group_level = $self->[_group_level_]; - VALIGN_DEBUG_FLAG_APPEND0 - && do { - my $group_list_type = $group_lines[0]->get_list_type(); + # Debug + 0 && do { my ( $a, $b, $c ) = caller(); - my $nlines = @group_lines; - my $maximum_field_index = $group_lines[0]->get_jmax(); - my $rfields_old = $group_lines[0]->get_rfields(); - my $tok = $rfields_old->[0]; + my $nlines = @{$rgroup_lines}; print STDOUT -"APPEND0: my_flush_code called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$nlines extra=$extra_indent_ok first tok=$tok;\n"; +"APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n"; + }; + + ############################################ + # Section 1: Handle a group of COMMENT lines + ############################################ + if ( $group_type eq 'COMMENT' ) { + $self->_flush_comment_lines(); + return; + } + + ######################################################################### + # Section 2: Handle line(s) of CODE. Most of the actual work of vertical + # aligning happens here in the following steps: + ######################################################################### + + # STEP 1: Remove most unmatched tokens. They block good alignments. + my ( $max_lev_diff, $saw_side_comment ) = + delete_unmatched_tokens( $rgroup_lines, $group_level ); + + # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly + # matching common alignments. The indexes of these subgroups are in the + # return variable. + my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level ); + + # STEP 3: Sweep left to right through the lines, looking for leading + # alignment tokens shared by groups. + sweep_left_to_right( $rgroup_lines, $rgroups, $group_level ) + if ( @{$rgroups} > 1 ); + + # STEP 4: Move side comments to a common column if possible. + if ($saw_side_comment) { + $self->align_side_comments( $rgroup_lines, $rgroups ); + } - }; + # STEP 5: For the -lp option, increase the indentation of lists + # to the desired amount, but do not exceed the line length limit. - # some small groups are best left unaligned - my $do_not_align = decide_if_aligned_pair(); + # We are allowed to shift a group of lines to the right if: + # (1) its level is greater than the level of the previous group, and + # (2) its level is greater than the level of the next line to be written. - # optimize side comment location - $do_not_align = adjust_side_comment($do_not_align); + my $extra_indent_ok; + if ( $group_level > $self->[_last_level_written_] ) { - # recover spaces for -lp option if possible - my $extra_leading_spaces = get_extra_leading_spaces(); + # Use the level jump to next line to come, if given + if ( defined($level_jump) ) { + $extra_indent_ok = $level_jump < 0; + } + + # Otherwise, assume the next line has the level of the end of last line. + # This fixes case c008. + else { + my $level_end = $rgroup_lines->[-1]->get_level_end(); + $extra_indent_ok = $group_level > $level_end; + } + } - # all lines of this group have the same basic leading spacing - my $group_leader_length = $group_lines[0]->get_leading_space_count(); + my $extra_leading_spaces = + $extra_indent_ok + ? get_extra_leading_spaces( $rgroup_lines, $rgroups ) + : 0; - # add extra leading spaces if helpful - # NOTE: Use zero; this did not work well - my $min_ci_gap = 0; + # STEP 6: Output the lines. + # All lines in this batch have the same basic leading spacing: + my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count(); - # output the lines - foreach my $line (@group_lines) { - valign_output_step_A( $line, $min_ci_gap, $do_not_align, - $group_leader_length, $extra_leading_spaces ); + foreach my $line ( @{$rgroup_lines} ) { + $self->valign_output_step_A( + { + line => $line, + min_ci_gap => 0, + do_not_align => 0, + group_leader_length => $group_leader_length, + extra_leading_spaces => $extra_leading_spaces, + level => $group_level, + } + ); } - initialize_for_new_group(); + $self->initialize_for_new_group(); return; } -sub my_flush { +{ ## closure for sub sweep_top_down - # This is the vertical aligner internal flush, which leaves the cache - # intact - return unless (@group_lines); + my $rall_lines; # all of the lines + my $grp_level; # level of all lines + my $rgroups; # describes the partition of lines we will make here + my $group_line_count; # number of lines in current partition - VALIGN_DEBUG_FLAG_APPEND0 && do { - my ( $a, $b, $c ) = caller(); - my $nlines = @group_lines; - print STDOUT -"APPEND0: my_flush called from $a $b $c lines=$nlines, type=$group_type \n"; - }; + BEGIN { $rgroups = [] } - # handle a group of COMMENT lines - if ( $group_type eq 'COMMENT' ) { my_flush_comment() } + sub initialize_for_new_rgroup { + $group_line_count = 0; + return; + } - # handle a single line of CODE - elsif ( @group_lines == 1 ) { my_flush_code() } + sub add_to_rgroup { - # handle group(s) of CODE lines - else { + my ($jend) = @_; + my $rline = $rall_lines->[$jend]; - # LP FIX PART 1 - # If we are trying to add extra indentation for -lp formatting, - # then we need to try to keep the group intact. But we have - # to set the $extra_indent_ok flag to zero in case some lines - # are output separately. We fix things up at the bottom. - # NOTE: this is a workaround but is tentative; we should really look to - # see if if extra indentation is possible. - my $rOpt_lp = $rOpts->{'line-up-parentheses'}; - my $keep_group_intact = $rOpt_lp && $extra_indent_ok; - my $extra_indent_ok_save = $extra_indent_ok; - $extra_indent_ok = 0; + my $jbeg = $jend; + if ( $group_line_count == 0 ) { + install_new_alignments($rline); + } + else { + my $rvals = pop @{$rgroups}; + $jbeg = $rvals->[0]; + copy_old_alignments( $rline, $rall_lines->[$jbeg] ); + } + push @{$rgroups}, [ $jbeg, $jend, undef ]; + $group_line_count++; + return; + } + + sub get_rgroup_jrange { + + return unless @{$rgroups}; + return unless ( $group_line_count > 0 ); + my ( $jbeg, $jend ) = @{ $rgroups->[-1] }; + return ( $jbeg, $jend ); + } - # we will rebuild alignment line group(s); - my @new_lines = @group_lines; - initialize_for_new_group(); + sub end_rgroup { + + my ($imax_align) = @_; + return unless @{$rgroups}; + return unless ( $group_line_count > 0 ); + + my ( $jbeg, $jend ) = @{ pop @{$rgroups} }; + push @{$rgroups}, [ $jbeg, $jend, $imax_align ]; + + # Undo some alignments of poor two-line combinations. + # We had to wait until now to know the line count. + if ( $jend - $jbeg == 1 ) { + my $line_0 = $rall_lines->[$jbeg]; + my $line_1 = $rall_lines->[$jend]; + + my $imax_pair = $line_1->get_imax_pair(); + if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair } + + ## flag for possible future use: + ## my $is_isolated_pair = $imax_pair < 0 + ## && ( $jbeg == 0 + ## || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 ); + + my $imax_prev = + $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1; + + my ( $is_marginal, $imax_align_fix ) = + is_marginal_match( $line_0, $line_1, $grp_level, $imax_align, + $imax_prev ); + if ($is_marginal) { + combine_fields( $line_0, $line_1, $imax_align_fix ); + } + } - # remove unmatched tokens in all lines - delete_unmatched_tokens( \@new_lines ); + initialize_for_new_rgroup(); + return; + } - foreach my $new_line (@new_lines) { + sub block_penultimate_match { - # Start a new group if necessary - if ( !@group_lines ) { - add_to_group($new_line); + # emergency reset to prevent sweep_left_to_right from trying to match a + # failed terminal else match + return unless @{$rgroups} > 1; + $rgroups->[-2]->[2] = -1; + return; + } + sub sweep_top_down { + my ( $self, $rlines, $group_level ) = @_; + + # Partition the set of lines into final alignment subgroups + # and store the alignments with the lines. + + # The alignment subgroups we are making here are groups of consecutive + # lines which have (1) identical alignment tokens and (2) do not + # exceed the allowable maximum line length. A later sweep from + # left-to-right ('sweep_lr') will handle additional alignments. + + # transfer args to closure variables + $rall_lines = $rlines; + $grp_level = $group_level; + $rgroups = []; + initialize_for_new_rgroup(); + return unless @{$rlines}; # shouldn't happen + + # Unset the _end_group flag for the last line if it it set because it + # is not needed and can causes problems for -lp formatting + $rall_lines->[-1]->set_end_group(0); + + # Loop over all lines ... + my $jline = -1; + foreach my $new_line ( @{$rall_lines} ) { + $jline++; + + # Start a new subgroup if necessary + if ( !$group_line_count ) { + add_to_rgroup($jline); + if ( $new_line->get_end_group() ) { + end_rgroup(-1); + } next; } my $j_terminal_match = $new_line->get_j_terminal_match(); - my $base_line = $group_lines[0]; + my ( $jbeg, $jend ) = get_rgroup_jrange(); + if ( !defined($jbeg) ) { + + # safety check, shouldn't happen + $self->warning(<[$jbeg]; # Initialize a global flag saying if the last line of the group # should match end of group and also terminate the group. There @@ -2079,16 +1609,31 @@ sub my_flush { $col_matching_terminal = $base_line->get_column($j_terminal_match); - # set global flag for sub decide_if_aligned_pair - $is_matching_terminal_line = 1; + # Ignore an undefined value as a defensive step; shouldn't + # normally happen. + $col_matching_terminal = 0 + unless defined($col_matching_terminal); } # ------------------------------------------------------------- - # Allow hanging side comment to join current group, if any. This - # will help keep side comments aligned, because otherwise we - # will have to start a new group, making alignment less likely. + # Allow hanging side comment to join current group, if any. The + # only advantage is to keep the other tokens in the same group. For + # example, this would make the '=' align here: + # $ax = 1; # side comment + # # hanging side comment + # $boondoggle = 5; # side comment + # $beetle = 5; # side comment + + # here is another example.. + + # _rtoc_name_count => {}, # hash to track .. + # _rpackage_stack => [], # stack to check .. + # # name changes + # _rlast_level => \$last_level, # brace indentation + # + # + # If this were not desired, the next step could be skipped. # ------------------------------------------------------------- - if ( $new_line->get_is_hanging_side_comment() ) { join_hanging_comment( $new_line, $base_line ); } @@ -2097,466 +1642,2190 @@ sub my_flush { # BEFORE this line unless both it and the previous line have side # comments. This prevents this line from pushing side coments out # to the right. - elsif ( $new_line->get_jmax() == 1 && !$keep_group_intact ) { + elsif ( $new_line->get_jmax() == 1 ) { # There are no matching tokens, so now check side comments. # Programming note: accessing arrays with index -1 is # risky in Perl, but we have verified there is at least one # line in the group and that there is at least one field. - my $prev_comment = $group_lines[-1]->get_rfields()->[-1]; + my $prev_comment = + $rall_lines->[ $jline - 1 ]->get_rfields()->[-1]; my $side_comment = $new_line->get_rfields()->[-1]; - my_flush_code() unless ( $side_comment && $prev_comment ); - + end_rgroup(-1) unless ( $side_comment && $prev_comment ); } - # ------------------------------------------------------------- - # If there is just one previous line, and it has more fields - # than the new line, try to join fields together to get a match - # with the new line. At the present time, only a single - # leading '=' is allowed to be compressed out. This is useful - # in rare cases where a table is forced to use old breakpoints - # because of side comments, - # and the table starts out something like this: - # my %MonthChars = ('0', 'Jan', # side comment - # '1', 'Feb', - # '2', 'Mar', - # Eliminating the '=' field will allow the remaining fields to - # line up. This situation does not occur if there are no side - # comments because scan_list would put a break after the - # opening '('. - # ------------------------------------------------------------- - - eliminate_old_fields( $new_line, $base_line ); - - # ------------------------------------------------------------- - # If the new line has more fields than the current group, - # see if we can match the first fields and combine the remaining - # fields of the new line. - # ------------------------------------------------------------- - - eliminate_new_fields( $new_line, $base_line ); - - # ------------------------------------------------------------- - # Flush previous group unless all common tokens and patterns - # match.. - - check_match( $new_line, $base_line ); - - # ------------------------------------------------------------- - # See if there is space for this line in the current group (if - # any) - # ------------------------------------------------------------- - if (@group_lines) { - check_fit( $new_line, $base_line ); + # See if the new line matches and fits the current group, + # if it still exists. Flush the current group if not. + my $match_code; + if ($group_line_count) { + ( $match_code, my $imax_align ) = + $self->check_match( $new_line, $base_line, + $rall_lines->[ $jline - 1 ] ); + if ( $match_code != 2 ) { end_rgroup($imax_align) } } - add_to_group($new_line); + # Store the new line + add_to_rgroup($jline); if ( defined($j_terminal_match) ) { - # if there is only one line in the group (maybe due to failure - # to match perfectly with previous lines), then align the ? or - # { of this terminal line with the previous one unless that - # would make the line too long - if ( @group_lines == 1 ) { - $base_line = $group_lines[0]; + # Decide if we should fix a terminal match. We can either: + # 1. fix it and prevent the sweep_lr from changing it, or + # 2. leave it alone and let sweep_lr try to fix it. + + # The current logic is to fix it if: + # -it has not joined to previous lines, + # -and either the previous subgroup has just 1 line, or + # -this line matched but did not fit (so sweep won't work) + my $fixit; + if ( $group_line_count == 1 ) { + $fixit ||= $match_code; + if ( !$fixit ) { + if ( @{$rgroups} > 1 ) { + my ( $jbegx, $jendx ) = @{ $rgroups->[-2] }; + my $nlines = $jendx - $jbegx + 1; + $fixit ||= $nlines <= 1; + } + } + } + + if ($fixit) { + $base_line = $new_line; my $col_now = $base_line->get_column($j_terminal_match); - my $pad = $col_matching_terminal - $col_now; + + # Ignore an undefined value as a defensive step; shouldn't + # normally happen. + $col_now = 0 unless defined($col_now); + + my $pad = $col_matching_terminal - $col_now; my $padding_available = $base_line->get_available_space_on_right(); - if ( $pad > 0 && $pad <= $padding_available ) { + if ( $col_now && $pad > 0 && $pad <= $padding_available ) { $base_line->increase_field_width( $j_terminal_match, $pad ); } + + # do not let sweep_left_to_right change an isolated 'else' + if ( !$new_line->get_is_terminal_ternary() ) { + block_penultimate_match(); + } } - my_flush_code(); - $is_matching_terminal_line = 0; + end_rgroup(-1); } - # Optional optimization; end the group if we know we cannot match - # next line. - elsif ( $new_line->{_end_group} ) { - my_flush_code(); + # end the group if we know we cannot match next line. + elsif ( $new_line->get_end_group() ) { + end_rgroup(-1); } - } + } ## end loop over lines - # LP FIX PART 2 - # if we managed to keep the group intact for -lp formatting, - # restore the flag which allows extra indentation - if ( $keep_group_intact && @group_lines == @new_lines ) { - $extra_indent_ok = $extra_indent_ok_save; - } - my_flush_code(); + end_rgroup(-1); + return ($rgroups); } - return; } -sub delete_selected_tokens { +sub two_line_pad { - my ( $line_obj, $ridel ) = @_; + my ( $line_m, $line, $imax_min ) = @_; - # remove an unused alignment token(s) to improve alignment chances - return unless ( defined($line_obj) && defined($ridel) && @{$ridel} ); + # Given: + # two isolated (list) lines + # imax_min = number of common alignment tokens + # Return: + # $pad_max = maximum suggested pad distnce + # = 0 if alignment not recommended + # Note that this is only for two lines which do not have alignment tokens + # in common with any other lines. It is intended for lists, but it might + # also be used for two non-list lines with a common leading '='. - my $jmax_old = $line_obj->get_jmax(); - my $rfields_old = $line_obj->get_rfields(); - my $rpatterns_old = $line_obj->get_rpatterns(); - my $rtokens_old = $line_obj->get_rtokens(); + # Allow alignment if the difference in the two unpadded line lengths + # is not more than either line length. The idea is to avoid + # aligning lines with very different field lengths, like these two: - local $" = '> <'; - 0 && print < -old jmax: $jmax_old -old tokens: <@{$rtokens_old}> -old patterns: <@{$rpatterns_old}> -old fields: <@{$rfields_old}> -EOM + # [ + # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1, + # 1, 0, 0, 0, undef, 0, 0 + # ]; + my $rfield_lengths = $line->get_rfield_lengths(); + my $rfield_lengths_m = $line_m->get_rfield_lengths(); - my $rfields_new = []; - my $rpatterns_new = []; - my $rtokens_new = []; + # Safety check - shouldn't happen + return 0 + unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m}; - my $kmax = @{$ridel} - 1; - my $k = 0; - my $jdel_next = $ridel->[$k]; + my $lensum_m = 0; + my $lensum = 0; + for ( my $i = 0 ; $i <= $imax_min ; $i++ ) { + $lensum_m += $rfield_lengths_m->[$i]; + $lensum += $rfield_lengths->[$i]; + } - # FIXME: - if ( $jdel_next < 0 ) { print STDERR "bad jdel_next=$jdel_next\n"; return } - my $pattern = $rpatterns_old->[0]; - my $field = $rfields_old->[0]; - push @{$rfields_new}, $field; - push @{$rpatterns_new}, $pattern; - for ( my $j = 0 ; $j < $jmax_old ; $j++ ) { - my $token = $rtokens_old->[$j]; - my $field = $rfields_old->[ $j + 1 ]; - my $pattern = $rpatterns_old->[ $j + 1 ]; - if ( $k > $kmax || $j < $jdel_next ) { - push @{$rtokens_new}, $token; - push @{$rfields_new}, $field; - push @{$rpatterns_new}, $pattern; - } - elsif ( $j == $jdel_next ) { - $rfields_new->[-1] .= $field; - $rpatterns_new->[-1] .= $pattern; - if ( ++$k <= $kmax ) { - my $jdel_last = $jdel_next; - $jdel_next = $ridel->[$k]; - if ( $jdel_next < $jdel_last ) { - - # FIXME: - print STDERR "bad jdel_next=$jdel_next\n"; - return; - } - } + my ( $lenmin, $lenmax ) = + $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m ); + + my $patterns_match; + if ( $line_m->get_list_type() && $line->get_list_type() ) { + $patterns_match = 1; + my $rpatterns_m = $line_m->get_rpatterns(); + my $rpatterns = $line->get_rpatterns(); + for ( my $i = 0 ; $i <= $imax_min ; $i++ ) { + my $pat = $rpatterns->[$i]; + my $pat_m = $rpatterns_m->[$i]; + if ( $pat ne $pat_m ) { $patterns_match = 0; last } } } - # ----- x ------ x ------ x ------ - #t 0 1 2 <- token indexing - #f 0 1 2 3 <- field and pattern + my $pad_max = $lenmax; + if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 } - my $jmax_new = @{$rfields_new} - 1; - $line_obj->set_rtokens($rtokens_new); - $line_obj->set_rpatterns($rpatterns_new); - $line_obj->set_rfields($rfields_new); - $line_obj->set_jmax($jmax_new); + return $pad_max; +} - 0 && print < 0 ); + + ############################################################################ + # Step 1: Loop over groups to find all common leading alignment tokens + ############################################################################ + + my $line; + my $rtokens; + my $imax; # index of maximum non-side-comment alignment token + my $istop; # an optional stopping index + my $jbeg; # starting line index + my $jend; # ending line index + + my $line_m; + my $rtokens_m; + my $imax_m; + my $istop_m; + my $jbeg_m; + my $jend_m; + + my $istop_mm; + + # Look at neighboring pairs of groups and form a simple list + # of all common leading alignment tokens. Foreach such match we + # store [$i, $ng], where + # $i = index of the token in the line (0,1,...) + # $ng is the second of the two groups with this common token + my @icommon; + + # Hash to hold the maximum alignment change for any group + my %max_move; + + # a small number of columns + my $short_pad = 4; + + my $ng = -1; + foreach my $item ( @{$rgroups} ) { + $ng++; + + $istop_mm = $istop_m; + + # save _m values of previous group + $line_m = $line; + $rtokens_m = $rtokens; + $imax_m = $imax; + $istop_m = $istop; + $jbeg_m = $jbeg; + $jend_m = $jend; + + # Get values for this group. Note that we just have to use values for + # one of the lines of the group since all members have the same + # alignments. + ( $jbeg, $jend, $istop ) = @{$item}; + + $line = $rlines->[$jbeg]; + $rtokens = $line->get_rtokens(); + $imax = $line->get_jmax() - 2; + $istop = -1 unless ( defined($istop) ); + $istop = $imax if ( $istop > $imax ); + + # Initialize on first group + next if ( $ng == 0 ); + + # Use the minimum index limit of the two groups + my $imax_min = $imax > $imax_m ? $imax_m : $imax; + + # Also impose a limit if given. + if ( $istop_m < $imax_min ) { + $imax_min = $istop_m; + } -new jmax: $jmax_new -new tokens: <@{$rtokens_new}> -new patterns: <@{$rpatterns_new}> -new fields: <@{$rfields_new}> -EOM - return; -} + # Special treatment of two one-line groups isolated from other lines, + # unless they form a simple list or a terminal match. Otherwise the + # alignment can look strange in some cases. + my $list_type = $rlines->[$jbeg]->get_list_type(); + if ( + $jend == $jbeg + && $jend_m == $jbeg_m + && ( $ng == 1 || $istop_mm < 0 ) + && ( $ng == $ng_max || $istop < 0 ) + && !$line->get_j_terminal_match() + + # Only do this for imperfect matches. This is normally true except + # when two perfect matches cannot form a group because the line + # length limit would be exceeded. In that case we can still try + # to match as many alignments as possible. + && ( $imax != $imax_m || $istop_m != $imax_m ) + ) + { -sub decode_alignment_token { + # We will just align assignments and simple lists + next unless ( $imax_min >= 0 ); + next + unless ( $rtokens->[0] =~ /^=\d/ + || $list_type ); + + # In this case we will limit padding to a short distance. This + # is a compromise to keep some vertical alignment but prevent large + # gaps, which do not look good for just two lines. + my $pad_max = + two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min ); + next unless ($pad_max); + my $ng_m = $ng - 1; + $max_move{"$ng_m"} = $pad_max; + $max_move{"$ng"} = $pad_max; + } - # Unpack the values packed in an alignment token - # - # Usage: - # my ( $raw_tok, $lev, $tag, $tok_count ) = - # decode_alignment_token($token); - - # Alignment tokens have a trailing decimal level and optional tag (for - # commas): - # For example, the first comma in the following line - # sub banner { crlf; report( shift, '/', shift ); crlf } - # is decorated as follows: - # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6) - - # An optional token count may be appended with a leading dot. - # Currently this is only done for '=' tokens but this could change. - # For example, consider the following line: - # $nport = $port = shift || $name; - # The first '=' may either be '=0' or '=0.1' [level 0, first equals] - # The second '=' will be '=0.2' [level 0, second equals] - my ($tok) = @_; - my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 ); - if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) { - $raw_tok = $1; - $lev = $2; - $tag = $3 if ($3); - $tok_count = $5 if ($5); - } - return ( $raw_tok, $lev, $tag, $tok_count ); + # Loop to find all common leading tokens. + if ( $imax_min >= 0 ) { + foreach my $i ( 0 .. $imax_min ) { + my $tok = $rtokens->[$i]; + my $tok_m = $rtokens_m->[$i]; + last if ( $tok ne $tok_m ); + push @icommon, [ $i, $ng, $tok ]; + } + } + } + return unless @icommon; + + ########################################################### + # Step 2: Reorder and consolidate the list into a task list + ########################################################### + + # We have to work first from lowest token index to highest, then by group, + # sort our list first on token index then group number + @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon; + + # Make a task list of the form + # [$i, ng_beg, $ng_end, $tok], .. + # where + # $i is the index of the token to be aligned + # $ng_beg..$ng_end is the group range for this action + my @todo; + my ( $i, $ng_end, $tok ); + foreach my $item (@icommon) { + my $ng_last = $ng_end; + my $i_last = $i; + ( $i, $ng_end, $tok ) = @{$item}; + my $ng_beg = $ng_end - 1; + if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) { + my $var = pop(@todo); + $ng_beg = $var->[1]; + } + my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok); + push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ]; + } + + ############################### + # Step 3: Execute the task list + ############################### + do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad, + $group_level ); + return; } -{ # sub is_deletable_token +{ ## closure for sub do_left_to_right_sweep - my %is_deletable_equals; + my %is_good_alignment_token; BEGIN { - my @q; - # These tokens with = may be deleted for vertical aligmnemt - @q = qw( - <= >= == =~ != <=> - ); - @is_deletable_equals{@q} = (1) x scalar(@q); + # One of the most difficult aspects of vertical alignment is knowing + # when not to align. Alignment can go from looking very nice to very + # bad when overdone. In the sweep algorithm there are two special + # cases where we may need to limit padding to a '$short_pad' distance + # to avoid some very ugly formatting: - } + # 1. Two isolated lines with partial alignment + # 2. A 'tail-wag-dog' situation, in which a single terminal + # line with partial alignment could cause a significant pad + # increase in many previous lines if allowed to join the alignment. - sub is_deletable_token { + # For most alignment tokens, we will allow only a small pad to be + # introduced (the hardwired $short_pad variable) . But for some 'good' + # alignments we can be less restrictive. - # Determine if a token with no match possibility can be removed to - # improve chances of making an alignment. - my ( $token, $i, $imax, $jline, $i_eq ) = @_; + # These are 'good' alignments, which are allowed more padding: + my @q = qw( + => = ? if unless or || { + ); + push @q, ','; + @is_good_alignment_token{@q} = (0) x scalar(@q); - my ( $raw_tok, $lev, $tag, $tok_count ) = - decode_alignment_token($token); + # Promote a few of these to 'best', with essentially no pad limit: + $is_good_alignment_token{'='} = 1; + $is_good_alignment_token{'if'} = 1; + $is_good_alignment_token{'unless'} = 1; + $is_good_alignment_token{'=>'} = 1 - # okay to delete second and higher copies of a token - if ( $tok_count > 1 ) { return 1 } + # Note the hash values are set so that: + # if ($is_good_alignment_token{$raw_tok}) => best + # if defined ($is_good_alignment_token{$raw_tok}) => good or best - # only remove lower level commas - if ( $raw_tok eq ',' ) { + } - return if ( defined($i_eq) && $i < $i_eq ); - return if ( $lev <= $group_level ); - } + sub do_left_to_right_sweep { + my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level ) + = @_; + + # $blocking_level[$nj is the level at a match failure between groups + # $ng-1 and $ng + my @blocking_level; + my $group_list_type = $rlines->[0]->get_list_type(); + + my $move_to_common_column = sub { + + # Move the alignment column of token $itok to $col_want for a + # sequence of groups. + my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_; + return unless ( defined($ngb) && $nge > $ngb ); + foreach my $ng ( $ngb .. $nge ) { + + my ( $jbeg, $jend ) = @{ $rgroups->[$ng] }; + my $line = $rlines->[$jbeg]; + my $col = $line->get_column($itok); + my $avail = $line->get_available_space_on_right(); + my $move = $col_want - $col; + if ( $move > 0 ) { + + # limit padding increase in isolated two lines + next + if ( defined( $rmax_move->{$ng} ) + && $move > $rmax_move->{$ng} + && !$is_good_alignment_token{$raw_tok} ); + + $line->increase_field_width( $itok, $move ); + } + elsif ( $move < 0 ) { - # most operators with an equals sign should be retained if at - # same level as this statement - elsif ( $raw_tok =~ /=/ ) { - return - unless ( $lev > $group_level || $is_deletable_equals{$raw_tok} ); - } + # spot to take special action on failure to move + } + } + }; + + foreach my $task ( @{$rtodo} ) { + my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task}; + + # Nothing to do for a single group + next unless ( $ng_end > $ng_beg ); + + my $ng_first; # index of the first group of a continuous sequence + my $col_want; # the common alignment column of a sequence of groups + my $col_limit; # maximum column before bumping into max line length + my $line_count_ng_m = 0; + my $jmax_m; + my $it_stop_m; + + # Loop over the groups + # 'ix_' = index in the array of lines + # 'ng_' = index in the array of groups + # 'it_' = index in the array of tokens + my $ix_min = $rgroups->[$ng_beg]->[0]; + my $ix_max = $rgroups->[$ng_end]->[1]; + my $lines_total = $ix_max - $ix_min + 1; + foreach my $ng ( $ng_beg .. $ng_end ) { + my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] }; + my $line_count_ng = $ix_end - $ix_beg + 1; + + # Important: note that since all lines in a group have a common + # alignments object, we just have to work on one of the lines + # (the first line). All of the rest will be changed + # automatically. + my $line = $rlines->[$ix_beg]; + my $jmax = $line->get_jmax(); - # otherwise, ok to delete the token - return 1; - } -} + # the maximum space without exceeding the line length: + my $avail = $line->get_available_space_on_right(); + my $col = $line->get_column($itok); + my $col_max = $col + $avail; + + # Initialize on first group + if ( !defined($col_want) ) { + $ng_first = $ng; + $col_want = $col; + $col_limit = $col_max; + $line_count_ng_m = $line_count_ng; + $jmax_m = $jmax; + $it_stop_m = $it_stop; + next; + } -sub delete_unmatched_tokens { - my ($rlines) = @_; + # RULE: Throw a blocking flag upon encountering a token level + # different from the level of the first blocking token. For + # example, in the following example, if the = matches get + # blocked between two groups as shown, then we want to start + # blocking matches at the commas, which are at deeper level, so + # that we do not get the big gaps shown here: + + # my $unknown3 = pack( "v", -2 ); + # my $unknown4 = pack( "v", 0x09 ); + # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 ); + # my $num_bbd_blocks = pack( "V", $num_lists ); + # my $root_startblock = pack( "V", $root_start ); + # my $unknown6 = pack( "VV", 0x00, 0x1000 ); + + # On the other hand, it is okay to keep matching at the same + # level such as in a simple list of commas and/or fat arrors. + + my $is_blocked = defined( $blocking_level[$ng] ) + && $lev > $blocking_level[$ng]; + + # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning: + # Do not let one or two lines with a **different number of + # alignments** open up a big gap in a large block. For + # example, we will prevent something like this, where the first + # line prys open the rest: + + # $worksheet->write( "B7", "http://www.perl.com", undef, $format ); + # $worksheet->write( "C7", "", $format ); + # $worksheet->write( "D7", "", $format ); + # $worksheet->write( "D8", "", $format ); + # $worksheet->write( "D8", "", $format ); + + # We should exclude from consideration two groups which are + # effectively the same but separated because one does not + # fit in the maximum allowed line length. + my $is_same_group = + $jmax == $jmax_m && $it_stop_m == $jmax_m - 2; + + my $lines_above = $ix_beg - $ix_min; + my $lines_below = $lines_total - $lines_above; + + # Increase the tolerable gap for certain favorable factors + my $factor = 1; + my $top_level = $lev == $group_level; + + # Align best top level alignment tokens like '=', 'if', ... + # A factor of 10 allows a gap of up to 40 spaces + if ( $top_level && $is_good_alignment_token{$raw_tok} ) { + $factor = 10; + } - # This is a preliminary step in vertical alignment in which we remove as - # many obviously un-needed alignment tokens as possible. This will prevent - # them from interfering with the final alignment. + # Otherwise allow some minimal padding of good alignments + elsif ( - return unless @{$rlines}; - my $has_terminal_match = $rlines->[-1]->get_j_terminal_match(); + defined( $is_good_alignment_token{$raw_tok} ) - # ignore hanging side comments in these operations - my @filtered = grep { !$_->{_is_hanging_side_comment} } @{$rlines}; - my $rnew_lines = \@filtered; - my @i_equals; - my @min_levels; + # We have to be careful if there are just 2 lines. This + # two-line factor allows large gaps only for 2 lines which + # are simple lists with fewer items on the second line. It + # gives results similar to previous versions of perltidy. + && ( $lines_total > 2 + || $group_list_type && $jmax < $jmax_m && $top_level ) + ) + { + $factor += 1; + if ($top_level) { + $factor += 1; + } + } - my $jmax = @{$rnew_lines} - 1; + my $is_big_gap; + if ( !$is_same_group ) { + $is_big_gap ||= + ( $lines_above == 1 + || $lines_above == 2 && $lines_below >= 4 ) + && $col_want > $col + $short_pad * $factor; + $is_big_gap ||= + ( $lines_below == 1 + || $lines_below == 2 && $lines_above >= 4 ) + && $col > $col_want + $short_pad * $factor; + } - my %is_good_tok; + # if match is limited by gap size, stop aligning at this level + if ($is_big_gap) { + $blocking_level[$ng] = $lev - 1; + } - # create a hash of tokens for each line - my $rline_hashes = []; - foreach my $line ( @{$rnew_lines} ) { - my $rhash = {}; - my $rtokens = $line->get_rtokens(); - my $i = 0; - my $i_eq; - my $lev_min; - foreach my $tok ( @{$rtokens} ) { - my ( $raw_tok, $lev, $tag, $tok_count ) = - decode_alignment_token($tok); - if ( !defined($lev_min) || $lev < $lev_min ) { $lev_min = $lev } + # quit and restart if it cannot join this batch + if ( $col_want > $col_max + || $col > $col_limit + || $is_big_gap + || $is_blocked ) + { - # Possible future upgrade: for multiple matches, - # record [$i1, $i2, ..] instead of $i - $rhash->{$tok} = - [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ]; + # remember the level of the first blocking token + if ( !defined( $blocking_level[$ng] ) ) { + $blocking_level[$ng] = $lev; + } - # remember the first equals at line level - if ( !defined($i_eq) && $raw_tok eq '=' ) { - if ( $lev eq $group_level ) { $i_eq = $i } - } - $i++; - } - push @{$rline_hashes}, $rhash; - push @i_equals, $i_eq; - push @min_levels, $lev_min; - } - - # compare each line pair and record matches - my $rtok_hash = {}; - my $nr = 0; - for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) { - my $nl = $nr; - $nr = 0; - my $jr = $jl + 1; - my $rhash_l = $rline_hashes->[$jl]; - my $rhash_r = $rline_hashes->[$jr]; - my $count = 0; # UNUSED NOW? - my $ntoks = 0; - foreach my $tok ( keys %{$rhash_l} ) { - $ntoks++; - if ( defined( $rhash_r->{$tok} ) ) { - if ( $tok ne '#' ) { $count++; } - my $il = $rhash_l->{$tok}->[0]; - my $ir = $rhash_r->{$tok}->[0]; - $rhash_l->{$tok}->[2] = $ir; - $rhash_r->{$tok}->[1] = $il; - if ( $tok ne '#' ) { - push @{ $rtok_hash->{$tok} }, ( $jl, $jr ); - $nr++; + $move_to_common_column->( + $ng_first, $ng - 1, $itok, $col_want, $raw_tok + ); + $ng_first = $ng; + $col_want = $col; + $col_limit = $col_max; + $line_count_ng_m = $line_count_ng; + $jmax_m = $jmax; + $it_stop_m = $it_stop; + next; } - } - } - # Set a line break if no matching tokens between these lines - if ( $nr == 0 && $nl > 0 ) { - $rnew_lines->[$jl]->{_end_group} = 1; - } - } + $line_count_ng_m += $line_count_ng; - # find subgroups - my @subgroups; - push @subgroups, [ 0, $jmax ]; - for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) { - if ( $rnew_lines->[$jl]->{_end_group} ) { - $subgroups[-1]->[1] = $jl; - push @subgroups, [ $jl + 1, $jmax ]; - } + # update the common column and limit + if ( $col > $col_want ) { $col_want = $col } + if ( $col_max < $col_limit ) { $col_limit = $col_max } + + } ## end loop over groups + + if ( $ng_end > $ng_first ) { + $move_to_common_column->( + $ng_first, $ng_end, $itok, $col_want, $raw_tok + ); + } ## end loop over groups for one task + } ## end loop over tasks + + return; } +} - # Loop to process each subgroups - foreach my $item (@subgroups) { - my ( $jbeg, $jend ) = @{$item}; +sub delete_selected_tokens { - # look for complete ternary or if/elsif/else blocks - my $nlines = $jend - $jbeg + 1; - my %token_line_count; - for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { - my %seen; - my $line = $rnew_lines->[$jj]; - my $rtokens = $line->get_rtokens(); + my ( $line_obj, $ridel ) = @_; + + # $line_obj is the line to be modified + # $ridel is a ref to list of indexes to be deleted + + # remove an unused alignment token(s) to improve alignment chances + + return unless ( defined($line_obj) && defined($ridel) && @{$ridel} ); + + my $jmax_old = $line_obj->get_jmax(); + my $rfields_old = $line_obj->get_rfields(); + my $rfield_lengths_old = $line_obj->get_rfield_lengths(); + my $rpatterns_old = $line_obj->get_rpatterns(); + my $rtokens_old = $line_obj->get_rtokens(); + my $j_terminal_match = $line_obj->get_j_terminal_match(); + + use constant EXPLAIN_DELETE_SELECTED => 0; + + local $" = '> <'; + EXPLAIN_DELETE_SELECTED && print < +old jmax: $jmax_old +old tokens: <@{$rtokens_old}> +old patterns: <@{$rpatterns_old}> +old fields: <@{$rfields_old}> +old field_lengths: <@{$rfield_lengths_old}> +EOM + + my $rfields_new = []; + my $rpatterns_new = []; + my $rtokens_new = []; + my $rfield_lengths_new = []; + + # Convert deletion list to a hash to allow any order, multiple entries, + # and avoid problems with index values out of range + my %delete_me; + @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} ); + + my $pattern = $rpatterns_old->[0]; + my $field = $rfields_old->[0]; + my $field_length = $rfield_lengths_old->[0]; + push @{$rfields_new}, $field; + push @{$rfield_lengths_new}, $field_length; + push @{$rpatterns_new}, $pattern; + + # Loop to either copy items or concatenate fields and patterns + my $jmin_del; + for ( my $j = 0 ; $j < $jmax_old ; $j++ ) { + my $token = $rtokens_old->[$j]; + my $field = $rfields_old->[ $j + 1 ]; + my $field_length = $rfield_lengths_old->[ $j + 1 ]; + my $pattern = $rpatterns_old->[ $j + 1 ]; + if ( !$delete_me{$j} ) { + push @{$rtokens_new}, $token; + push @{$rfields_new}, $field; + push @{$rpatterns_new}, $pattern; + push @{$rfield_lengths_new}, $field_length; + } + else { + if ( !defined($jmin_del) ) { $jmin_del = $j } + $rfields_new->[-1] .= $field; + $rfield_lengths_new->[-1] += $field_length; + $rpatterns_new->[-1] .= $pattern; + } + } + + # ----- x ------ x ------ x ------ + #t 0 1 2 <- token indexing + #f 0 1 2 3 <- field and pattern + + my $jmax_new = @{$rfields_new} - 1; + $line_obj->set_rtokens($rtokens_new); + $line_obj->set_rpatterns($rpatterns_new); + $line_obj->set_rfields($rfields_new); + $line_obj->set_rfield_lengths($rfield_lengths_new); + $line_obj->set_jmax($jmax_new); + + # The value of j_terminal_match will be incorrect if we delete tokens prior + # to it. We will have to give up on aligning the terminal tokens if this + # happens. + if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) { + $line_obj->set_j_terminal_match(undef); + } + + # update list type - + if ( $line_obj->get_list_seqno() ) { + + ## This works, but for efficiency see if we need to make a change: + ## decide_if_list($line_obj); + + # An existing list will still be a list but with possibly different + # leading token + my $old_list_type = $line_obj->get_list_type(); + my $new_list_type = ""; + if ( $rtokens_new->[0] =~ /^(=>|,)/ ) { + $new_list_type = $rtokens_new->[0]; + } + if ( !$old_list_type || $old_list_type ne $new_list_type ) { + decide_if_list($line_obj); + } + } + + EXPLAIN_DELETE_SELECTED && print < +new patterns: <@{$rpatterns_new}> +new fields: <@{$rfields_new}> +EOM + return; +} + +{ ## closure for sub decode_alignment_token + + # This routine is called repeatedly for each token, so it needs to be + # efficient. We can speed things up by remembering the inputs and outputs + # in a hash. + my %decoded_token; + + sub initialize_decode { + + # We will re-initialize the hash for each file. Otherwise, there is + # a danger that the hash can become arbitrarily large if a very large + # number of files is processed at once. + %decoded_token = (); + return; + } + + sub decode_alignment_token { + + # Unpack the values packed in an alignment token + # + # Usage: + # my ( $raw_tok, $lev, $tag, $tok_count ) = + # decode_alignment_token($token); + + # Alignment tokens have a trailing decimal level and optional tag (for + # commas): + # For example, the first comma in the following line + # sub banner { crlf; report( shift, '/', shift ); crlf } + # is decorated as follows: + # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6) + + # An optional token count may be appended with a leading dot. + # Currently this is only done for '=' tokens but this could change. + # For example, consider the following line: + # $nport = $port = shift || $name; + # The first '=' may either be '=0' or '=0.1' [level 0, first equals] + # The second '=' will be '=0.2' [level 0, second equals] + my ($tok) = @_; + + if ( defined( $decoded_token{$tok} ) ) { + return @{ $decoded_token{$tok} }; + } + + my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 ); + if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) { + $raw_tok = $1; + $lev = $2; + $tag = $3 if ($3); + $tok_count = $5 if ($5); + } + my @vals = ( $raw_tok, $lev, $tag, $tok_count ); + $decoded_token{$tok} = \@vals; + return @vals; + } +} + +{ ## closure for sub delete_unmatched_tokens + + my %is_assignment; + my %keep_after_deleted_assignment; + + BEGIN { + my @q; + + @q = qw( + = **= += *= &= <<= &&= + -= /= |= >>= ||= //= + .= %= ^= + x= + ); + @is_assignment{@q} = (1) x scalar(@q); + + # These tokens may be kept following an = deletion + @q = qw( + if unless or || + ); + @keep_after_deleted_assignment{@q} = (1) x scalar(@q); + + } + + # This flag is for testing only and should normally be zero. + use constant TEST_DELETE_NULL => 0; + + sub delete_unmatched_tokens { + my ( $rlines, $group_level ) = @_; + + # This is a preliminary step in vertical alignment in which we remove + # as many obviously un-needed alignment tokens as possible. This will + # prevent them from interfering with the final alignment. + + # These are the return values + my $max_lev_diff = 0; # used to avoid a call to prune_tree + my $saw_side_comment = 0; # used to avoid a call for side comments + + # Handle no lines -- shouldn't happen + return unless @{$rlines}; + + # Handle a single line + if ( @{$rlines} == 1 ) { + my $line = $rlines->[0]; + my $jmax = $line->get_jmax(); + my $length = $line->get_rfield_lengths()->[$jmax]; + $saw_side_comment = $length > 0; + return ( $max_lev_diff, $saw_side_comment ); + } + + my $has_terminal_match = $rlines->[-1]->get_j_terminal_match(); + + # ignore hanging side comments in these operations + my @filtered = grep { !$_->get_is_hanging_side_comment() } @{$rlines}; + my $rnew_lines = \@filtered; + + $saw_side_comment = @filtered != @{$rlines}; + $max_lev_diff = 0; + + # nothing to do if all lines were hanging side comments + my $jmax = @{$rnew_lines} - 1; + return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 ); + + my @equals_info; + my @line_info; + my %is_good_tok; + + # create a hash of tokens for each line + my $rline_hashes = []; + foreach my $line ( @{$rnew_lines} ) { + my $rhash = {}; + my $rtokens = $line->get_rtokens(); + my $rpatterns = $line->get_rpatterns(); + my $i = 0; + my ( $i_eq, $tok_eq, $pat_eq ); + my ( $lev_min, $lev_max ); foreach my $tok ( @{$rtokens} ) { - if ( !$seen{$tok} ) { - $seen{$tok}++; - $token_line_count{$tok}++; + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token($tok); + + if ( $tok ne '#' ) { + if ( !defined($lev_min) ) { + $lev_min = $lev; + $lev_max = $lev; + } + else { + if ( $lev < $lev_min ) { $lev_min = $lev } + if ( $lev > $lev_max ) { $lev_max = $lev } + } + } + else { + if ( !$saw_side_comment ) { + my $length = $line->get_rfield_lengths()->[ $i + 1 ]; + $saw_side_comment ||= $length; + } } + + # Possible future upgrade: for multiple matches, + # record [$i1, $i2, ..] instead of $i + $rhash->{$tok} = + [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ]; + + # remember the first equals at line level + if ( !defined($i_eq) && $raw_tok eq '=' ) { + + if ( $lev eq $group_level ) { + $i_eq = $i; + $tok_eq = $tok; + $pat_eq = $rpatterns->[$i]; + } + } + $i++; + } + push @{$rline_hashes}, $rhash; + push @equals_info, [ $i_eq, $tok_eq, $pat_eq ]; + push @line_info, [ $lev_min, $lev_max ]; + if ( defined($lev_min) ) { + my $lev_diff = $lev_max - $lev_min; + if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff } } } - # Look for if/else/elsif and ternary blocks - my $is_full_block; - foreach my $tok ( keys %token_line_count ) { - if ( $token_line_count{$tok} == $nlines ) { - if ( $tok =~ /^\?/ || $tok =~ /^\{\d+if/ ) { - $is_full_block = 1; + # compare each line pair and record matches + my $rtok_hash = {}; + my $nr = 0; + for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) { + my $nl = $nr; + $nr = 0; + my $jr = $jl + 1; + my $rhash_l = $rline_hashes->[$jl]; + my $rhash_r = $rline_hashes->[$jr]; + my $count = 0; # UNUSED NOW? + my $ntoks = 0; + foreach my $tok ( keys %{$rhash_l} ) { + $ntoks++; + if ( defined( $rhash_r->{$tok} ) ) { + if ( $tok ne '#' ) { $count++; } + my $il = $rhash_l->{$tok}->[0]; + my $ir = $rhash_r->{$tok}->[0]; + $rhash_l->{$tok}->[2] = $ir; + $rhash_r->{$tok}->[1] = $il; + if ( $tok ne '#' ) { + push @{ $rtok_hash->{$tok} }, ( $jl, $jr ); + $nr++; + } + } + } + + # Set a line break if no matching tokens between these lines + # (this is not strictly necessary now but does not hurt) + if ( $nr == 0 && $nl > 0 ) { + $rnew_lines->[$jl]->set_end_group(1); + } + + # Also set a line break if both lines have simple equals but with + # different leading characters in patterns. This check is similar + # to one in sub check_match, and will prevent sub + # prune_alignment_tree from removing alignments which otherwise + # should be kept. This fix is rarely needed, but it can + # occasionally improve formatting. + # For example: + # my $name = $this->{Name}; + # $type = $this->ctype($genlooptype) if defined $genlooptype; + # my $declini = ( $asgnonly ? "" : "\t$type *" ); + # my $cast = ( $type ? "($type *)" : "" ); + # The last two lines start with 'my' and will not match the + # previous line starting with $type, so we do not want + # prune_alignment tree to delete their ? : alignments at a deeper + # level. + my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] }; + my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] }; + if ( defined($i_eq_l) && defined($i_eq_r) ) { + + # Also, do not align equals across a change in ci level + my $ci_jump = $rnew_lines->[$jl]->get_ci_level() != + $rnew_lines->[$jr]->get_ci_level(); + + if ( + $tok_eq_l eq $tok_eq_r + && $i_eq_l == 0 + && $i_eq_r == 0 + && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 ) + || $ci_jump ) + ) + { + $rnew_lines->[$jl]->set_end_group(1); } } } - # remove unwanted alignment tokens - for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { - my $line = $rnew_lines->[$jj]; - my $rtokens = $line->get_rtokens(); - my $rhash = $rline_hashes->[$jj]; - my $i = 0; - my $i_eq = $i_equals[$jj]; - my @idel; - my $imax = @{$rtokens} - 2; - my $delete_above_level; + # find subgroups + my @subgroups; + push @subgroups, [ 0, $jmax ]; + for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) { + if ( $rnew_lines->[$jl]->get_end_group() ) { + $subgroups[-1]->[1] = $jl; + push @subgroups, [ $jl + 1, $jmax ]; + } + } - for ( my $i = 0 ; $i <= $imax ; $i++ ) { - my $tok = $rtokens->[$i]; - next if ( $tok eq '#' ); # shouldn't happen - my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) = - @{ $rhash->{$tok} }; - - # always remove unmatched tokens - my $delete_me = !defined($il) && !defined($ir); - - # also, if this is a complete ternary or if/elsif/else block, - # remove all alignments which are not also in every line - $delete_me ||= - ( $is_full_block && $token_line_count{$tok} < $nlines ); - - # Remove all tokens above a certain level following a previous - # deletion. For example, we have to remove tagged higher level - # alignment tokens following a => deletion because the tags of - # higher level tokens will now be incorrect. For example, this - # will prevent aligning commas as follows after deleting the - # second => - # $w->insert( - # ListBox => origin => [ 270, 160 ], - # size => [ 200, 55 ], - # ); - if ( defined($delete_above_level) ) { - if ( $lev > $delete_above_level ) { - $delete_me ||= 1; #$tag; + # flag to allow skipping pass 2 + my $saw_large_group; + + ############################################################ + # PASS 1 over subgroups to remove unmatched alignment tokens + ############################################################ + foreach my $item (@subgroups) { + my ( $jbeg, $jend ) = @{$item}; + + my $nlines = $jend - $jbeg + 1; + + #################################################### + # Look for complete if/elsif/else and ternary blocks + #################################################### + + # We are looking for a common '$dividing_token' like these: + + # if ( $b and $s ) { $p->{'type'} = 'a'; } + # elsif ($b) { $p->{'type'} = 'b'; } + # elsif ($s) { $p->{'type'} = 's'; } + # else { $p->{'type'} = ''; } + # ^----------- dividing_token + + # my $severity = + # !$routine ? '[PFX]' + # : $routine =~ /warn.*_d\z/ ? '[DS]' + # : $routine =~ /ck_warn/ ? 'W' + # : $routine =~ /ckWARN\d*reg_d/ ? 'S' + # : $routine =~ /ckWARN\d*reg/ ? 'W' + # : $routine =~ /vWARN\d/ ? '[WDS]' + # : '[PFX]'; + # ^----------- dividing_token + + # Only look for groups which are more than 2 lines long. Two lines + # can get messed up doing this, probably due to the various + # two-line rules. + + my $dividing_token; + my %token_line_count; + if ( $nlines > 2 ) { + + for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { + my %seen; + my $line = $rnew_lines->[$jj]; + my $rtokens = $line->get_rtokens(); + foreach my $tok ( @{$rtokens} ) { + if ( !$seen{$tok} ) { + $seen{$tok}++; + $token_line_count{$tok}++; + } } - else { $delete_above_level = undef } } - if ( - $delete_me - && is_deletable_token( $tok, $i, $imax, $jj, $i_eq ) + foreach my $tok ( keys %token_line_count ) { + if ( $token_line_count{$tok} == $nlines ) { + if ( substr( $tok, 0, 1 ) eq '?' + || substr( $tok, 0, 1 ) eq '{' + && $tok =~ /^\{\d+if/ ) + { + $dividing_token = $tok; + last; + } + } + } + } + + ##################################################### + # Loop over lines to remove unwanted alignment tokens + ##################################################### + for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { + my $line = $rnew_lines->[$jj]; + my $rtokens = $line->get_rtokens(); + my $rhash = $rline_hashes->[$jj]; + my $i_eq = $equals_info[$jj]->[0]; + my @idel; + my $imax = @{$rtokens} - 2; + my $delete_above_level; + my $deleted_assignment_token; + + my $saw_dividing_token = ""; + $saw_large_group ||= $nlines > 2 && $imax > 1; + + # Loop over all alignment tokens + for ( my $i = 0 ; $i <= $imax ; $i++ ) { + my $tok = $rtokens->[$i]; + next if ( $tok eq '#' ); # shouldn't happen + my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) = + @{ $rhash->{$tok} }; + + ####################################################### + # Here is the basic RULE: remove an unmatched alignment + # which does not occur in the surrounding lines. + ####################################################### + my $delete_me = !defined($il) && !defined($ir); + + # But now we modify this with exceptions... + + # EXCEPTION 1: If we are in a complete ternary or + # if/elsif/else group, and this token is not on every line + # of the group, should we delete it to preserve overall + # alignment? + if ($dividing_token) { + if ( $token_line_count{$tok} >= $nlines ) { + $saw_dividing_token ||= $tok eq $dividing_token; + } + else { + + # For shorter runs, delete toks to save alignment. + # For longer runs, keep toks after the '{' or '?' + # to allow sub-alignments within braces. The + # number 5 lines is arbitrary but seems to work ok. + $delete_me ||= + ( $nlines < 5 || !$saw_dividing_token ); + } + } + + # EXCEPTION 2: Remove all tokens above a certain level + # following a previous deletion. For example, we have to + # remove tagged higher level alignment tokens following a + # '=>' deletion because the tags of higher level tokens + # will now be incorrect. For example, this will prevent + # aligning commas as follows after deleting the second '=>' + # $w->insert( + # ListBox => origin => [ 270, 160 ], + # size => [ 200, 55 ], + # ); + if ( defined($delete_above_level) ) { + if ( $lev > $delete_above_level ) { + $delete_me ||= 1; #$tag; + } + else { $delete_above_level = undef } + } + + # EXCEPTION 3: Remove all but certain tokens after an + # assignment deletion. + if ( + $deleted_assignment_token + && ( $lev > $group_level + || !$keep_after_deleted_assignment{$raw_tok} ) + ) + { + $delete_me ||= 1; + } - # Patch: do not touch the first line of a terminal match, - # such as below, because j_terminal has already been set. + # EXCEPTION 4: Do not touch the first line of a 2 line + # terminal match, such as below, because j_terminal has + # already been set. # if ($tag) { $tago = "<$tag>"; $tagc = ""; } # else { $tago = $tagc = ''; } # But see snippets 'else1.t' and 'else2.t' - && !( $jj == $jbeg && $has_terminal_match && $nlines == 2 ) + $delete_me = 0 + if ( $jj == $jbeg + && $has_terminal_match + && $nlines == 2 ); - ) - { - push @idel, $i; - if ( !defined($delete_above_level) - || $lev < $delete_above_level ) - { + # EXCEPTION 5: misc additional rules for commas and equals + if ($delete_me) { + + # okay to delete second and higher copies of a token + if ( $tok_count == 1 ) { + + # for a comma... + if ( $raw_tok eq ',' ) { + + # Do not delete commas before an equals + $delete_me = 0 + if ( defined($i_eq) && $i < $i_eq ); + + # Do not delete line-level commas + $delete_me = 0 if ( $lev <= $group_level ); + } + + # For an assignment at group level.. + if ( $is_assignment{$raw_tok} + && $lev == $group_level ) + { + + # Do not delete if it is the last alignment of + # multiple tokens; this will prevent some + # undesirable alignments + if ( $imax > 0 && $i == $imax ) { + $delete_me = 0; + } + + # Otherwise, set a flag to delete most + # remaining tokens + else { $deleted_assignment_token = $raw_tok } + } + } + } + + ##################################### + # Add this token to the deletion list + ##################################### + if ($delete_me) { + push @idel, $i; + + # update deletion propagation flags + if ( !defined($delete_above_level) + || $lev < $delete_above_level ) + { - # delete all following higher level alignments - $delete_above_level = $lev; + # delete all following higher level alignments + $delete_above_level = $lev; - # but keep deleting after => to next lower level - # to avoid some bizarre alignments - if ( $raw_tok eq '=>' ) { - $delete_above_level = $lev - 1; + # but keep deleting after => to next lower level + # to avoid some bizarre alignments + if ( $raw_tok eq '=>' ) { + $delete_above_level = $lev - 1; + } } } + } # End loop over alignment tokens + + # Process all deletion requests for this line + if (@idel) { + delete_selected_tokens( $line, \@idel ); } - } + } # End loopover lines + } # End loop over subgroups + + ################################################# + # PASS 2 over subgroups to remove null alignments + ################################################# + + # This pass is only used for testing. It is helping to identify + # alignment situations which might be improved with a future more + # general algorithm which adds a tail matching capability. + if (TEST_DELETE_NULL) { + delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups ) + if ($saw_large_group); + } + + # PASS 3: Construct a tree of matched lines and delete some small deeper + # levels of tokens. They also block good alignments. + prune_alignment_tree($rnew_lines) if ($max_lev_diff); + + # PASS 4: compare all lines for common tokens + match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level ); + + return ( $max_lev_diff, $saw_side_comment ); + } +} + +sub delete_null_alignments { + my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_; + + # This is an optional second pass for deleting alignment tokens which can + # occasionally improve alignment. We look for and remove 'null + # alignments', which are alignments that require no padding. So we can + # 'cheat' and delete them. For example, notice the '=~' alignment in the + # first two lines of the following code: + + # $sysname .= 'del' if $self->label =~ /deletion/; + # $sysname .= 'ins' if $self->label =~ /insertion/; + # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq; + + # These '=~' tokens are already aligned because they are both the same + # distance from the previous alignment token, the 'if'. So we can + # eliminate them as alignments. The advantage is that in some cases, such + # as this one, this will allow other tokens to be aligned. In this case we + # then get the 'if' tokens to align: + + # $sysname .= 'del' if $self->label =~ /deletion/; + # $sysname .= 'ins' if $self->label =~ /insertion/; + # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq; + + # The following rules for limiting this operation have been found to + # work well and avoid problems: + + # Rule 1. We only consider a sequence of lines which have the same + # sequence of alignment tokens. + + # Rule 2. We never eliminate the first alignment token. One reason is that + # lines may have different leading indentation spaces, so keeping the + # first alignment token insures that our length measurements start at + # a well-defined point. Another reason is that nothing is gained because + # the left-to-right sweep can always handle alignment of this token. + + # Rule 3. We require that the first alignment token exist in either + # a previous line or a subsequent line. The reason is that this avoids + # changing two-line matches which go through special logic. + + # Rule 4. Do not delete a token which occurs in a previous or subsequent + # line. For example, in the above example, it was ok to eliminate the '=~' + # token from two lines because it did not occur in a surrounding line. + # If it did occur in a surrounding line, the result could be confusing + # or even incorrectly aligned. + + # A consequence of these rules is that we only need to consider subgroups + # with at least 3 lines and 2 alignment tokens. + + # The subgroup line index range + my ( $jbeg, $jend ); + + # Vars to keep track of the start of a current sequence of matching + # lines. + my $rtokens_match; + my $rfield_lengths_match; + my $j_match_beg; + my $j_match_end; + my $imax_match; + my $rneed_pad; + + # Vars for a line being tested + my $rtokens; + my $rfield_lengths; + my $imax; + + my $start_match = sub { + my ($jj) = @_; + $rtokens_match = $rtokens; + $rfield_lengths_match = $rfield_lengths; + $j_match_beg = $jj; + $j_match_end = $jj; + $imax_match = $imax; + $rneed_pad = []; + return; + }; + + my $add_to_match = sub { + my ($jj) = @_; + $j_match_end = $jj; + + # Keep track of any padding that would be needed for each token + for ( my $i = 0 ; $i <= $imax ; $i++ ) { + next if ( $rneed_pad->[$i] ); + my $length = $rfield_lengths->[$i]; + my $length_match = $rfield_lengths_match->[$i]; + if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 } + } + }; + + my $end_match = sub { + return unless ( $j_match_end > $j_match_beg ); + my $nlines = $j_match_end - $j_match_beg + 1; + my $rhash_beg = $rline_hashes->[$j_match_beg]; + my $rhash_end = $rline_hashes->[$j_match_end]; + my @idel; + + # Do not delete unless the first token also occurs in a surrounding line + my $tok0 = $rtokens_match->[0]; + return + unless ( + ( + $j_match_beg > $jbeg + && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq + $tok0 + ) + || ( $j_match_end < $jend + && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq + $tok0 ) + ); + + # Note that we are skipping the token at i=0 + for ( my $i = 1 ; $i <= $imax_match ; $i++ ) { + + # do not delete a token which requires padding to align + next if ( $rneed_pad->[$i] ); + + my $tok = $rtokens_match->[$i]; + + # Do not delete a token which occurs in a surrounding line + next + if ( $j_match_beg > $jbeg + && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) ); + next + if ( $j_match_end < $jend + && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) ); - if (@idel) { delete_selected_tokens( $line, \@idel ) } + # ok to delete + push @idel, $i; + ##print "ok to delete tok=$tok\n"; } + if (@idel) { + foreach my $j ( $j_match_beg .. $j_match_end ) { + delete_selected_tokens( $rnew_lines->[$j], \@idel ); + } + } + }; + + foreach my $item ( @{$rsubgroups} ) { + ( $jbeg, $jend ) = @{$item}; + my $nlines = $jend - $jbeg + 1; + next unless ( $nlines > 2 ); + + for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { + my $line = $rnew_lines->[$jj]; + $rtokens = $line->get_rtokens(); + $rfield_lengths = $line->get_rfield_lengths(); + $imax = @{$rtokens} - 2; + + # start a new match group + if ( $jj == $jbeg ) { + $start_match->($jj); + next; + } + + # see if all tokens of this line match the current group + my $match; + if ( $imax == $imax_match ) { + for ( my $i = 0 ; $i <= $imax ; $i++ ) { + my $tok = $rtokens->[$i]; + my $tok_match = $rtokens_match->[$i]; + last if ( $tok ne $tok_match ); + } + $match = 1; + } + + # yes, they all match + if ($match) { + $add_to_match->($jj); + } + + # now, this line does not match + else { + $end_match->(); + $start_match->($jj); + } + } # End loopover lines + $end_match->(); } # End loop over subgroups + return; +} ## end sub delete_null_alignments + +sub match_line_pairs { + my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_; + + # Compare each pair of lines and save information about common matches + # $rlines = list of lines including hanging side comments + # $rnew_lines = list of lines without any hanging side comments + # $rsubgroups = list of subgroups of the new lines + + # TODO: + # Maybe change: imax_pair => pair_match_info = ref to array + # = [$imax_align, $rMsg, ... ] + # This may eventually have multi-level match info + + # Previous line vars + my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m, + $list_type_m, $ci_level_m ); + + # Current line vars + my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type, + $ci_level ); + + use constant EXPLAIN_COMPARE_PATTERNS => 0; + + my $compare_patterns = sub { + + # helper routine to decide if patterns match well enough.. + # return code: + # 0 = patterns match, continue + # 1 = no match + # 2 = no match, and lines do not match at all + + my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_; + my $GoToMsg = ""; + my $return_code = 1; + + my ( $alignment_token, $lev, $tag, $tok_count ) = + decode_alignment_token($tok); + + # We have to be very careful about aligning commas + # when the pattern's don't match, because it can be + # worse to create an alignment where none is needed + # than to omit one. Here's an example where the ','s + # are not in named containers. The first line below + # should not match the next two: + # ( $a, $b ) = ( $b, $r ); + # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); + # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); + if ( $alignment_token eq ',' ) { + + # do not align commas unless they are in named + # containers + $GoToMsg = "do not align commas in unnamed containers"; + goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ ); + } + + # do not align parens unless patterns match; + # large ugly spaces can occur in math expressions. + elsif ( $alignment_token eq '(' ) { + + # But we can allow a match if the parens don't + # require any padding. + $GoToMsg = "do not align '(' unless patterns match or pad=0"; + if ( $pad != 0 ) { goto NO_MATCH } + } + + # Handle an '=' alignment with different patterns to + # the left. + elsif ( $alignment_token eq '=' ) { + + # It is best to be a little restrictive when + # aligning '=' tokens. Here is an example of + # two lines that we will not align: + # my $variable=6; + # $bb=4; + # The problem is that one is a 'my' declaration, + # and the other isn't, so they're not very similar. + # We will filter these out by comparing the first + # letter of the pattern. This is crude, but works + # well enough. + if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) { + $GoToMsg = "first character before equals differ"; + goto NO_MATCH; + } + + # The introduction of sub 'prune_alignment_tree' + # enabled alignment of lists left of the equals with + # other scalar variables. For example: + # my ( $D, $s, $e ) = @_; + # my $d = length $D; + # my $c = $e - $s - $d; + + # But this would change formatting of a lot of scripts, + # so for now we prevent alignment of comma lists on the + # left with scalars on the left. We will also prevent + # any partial alignments. + + # set return code 2 if the = is at line level, but + # set return code 1 if the = is below line level, i.e. + # sub new { my ( $p, $v ) = @_; bless \$v, $p } + # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; } + + elsif ( + ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) ) + { + $GoToMsg = "mixed commas/no-commas before equals"; + if ( $lev eq $group_level ) { + $return_code = 2; + } + goto NO_MATCH; + } + } + + MATCH: + return ( 0, \$GoToMsg ); + + NO_MATCH: + + EXPLAIN_COMPARE_PATTERNS + && print STDERR "no match because $GoToMsg\n"; + + return ( $return_code, \$GoToMsg ); + + }; ## end of $compare_patterns->() + + # loop over subgroups + foreach my $item ( @{$rsubgroups} ) { + my ( $jbeg, $jend ) = @{$item}; + my $nlines = $jend - $jbeg + 1; + next unless ( $nlines > 1 ); + + # loop over lines in a subgroup + for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) { + + $line_m = $line; + $rtokens_m = $rtokens; + $rpatterns_m = $rpatterns; + $rfield_lengths_m = $rfield_lengths; + $imax_m = $imax; + $list_type_m = $list_type; + $ci_level_m = $ci_level; + + $line = $rnew_lines->[$jj]; + $rtokens = $line->get_rtokens(); + $rpatterns = $line->get_rpatterns(); + $rfield_lengths = $line->get_rfield_lengths(); + $imax = @{$rtokens} - 2; + $list_type = $line->get_list_type(); + $ci_level = $line->get_ci_level(); + + # nothing to do for first line + next if ( $jj == $jbeg ); + + my $ci_jump = $ci_level - $ci_level_m; + + my $imax_min = $imax_m < $imax ? $imax_m : $imax; + + my $imax_align = -1; + + # find number of leading common tokens + + ################################# + # No match to hanging side comment + ################################# + if ( $line->get_is_hanging_side_comment() ) { + + # Should not get here; HSC's have been filtered out + $imax_align = -1; + } + + ############################## + # Handle comma-separated lists + ############################## + elsif ( $list_type && $list_type eq $list_type_m ) { + + # do not align lists across a ci jump with new list method + if ($ci_jump) { $imax_min = -1 } + + my $i_nomatch = $imax_min + 1; + for ( my $i = 0 ; $i <= $imax_min ; $i++ ) { + my $tok = $rtokens->[$i]; + my $tok_m = $rtokens_m->[$i]; + if ( $tok ne $tok_m ) { + $i_nomatch = $i; + last; + } + } + + $imax_align = $i_nomatch - 1; + } + + ################## + # Handle non-lists + ################## + else { + my $i_nomatch = $imax_min + 1; + for ( my $i = 0 ; $i <= $imax_min ; $i++ ) { + my $tok = $rtokens->[$i]; + my $tok_m = $rtokens_m->[$i]; + if ( $tok ne $tok_m ) { + $i_nomatch = $i; + last; + } + + my $pat = $rpatterns->[$i]; + my $pat_m = $rpatterns_m->[$i]; + + # If patterns don't match, we have to be careful... + if ( $pat_m ne $pat ) { + my $pad = + $rfield_lengths->[$i] - $rfield_lengths_m->[$i]; + my ( $match_code, $rmsg ) = $compare_patterns->( + $tok, $tok_m, $pat, $pat_m, $pad + ); + if ($match_code) { + if ( $match_code eq 1 ) { $i_nomatch = $i } + elsif ( $match_code eq 2 ) { $i_nomatch = 0 } + last; + } + } + } + $imax_align = $i_nomatch - 1; + } + + $line_m->set_imax_pair($imax_align); + + } ## end loop over lines + + # Put fence at end of subgroup + $line->set_imax_pair(-1); + + } ## end loop over subgroups + + # if there are hanging side comments, propagate the pair info down to them + # so that lines can just look back one line for their pair info. + if ( @{$rlines} > @{$rnew_lines} ) { + my $last_pair_info = -1; + foreach my $line ( @{$rlines} ) { + if ( $line->get_is_hanging_side_comment() ) { + $line->set_imax_pair($last_pair_info); + } + else { + $last_pair_info = $line->get_imax_pair(); + } + } + } + return; +} + +sub fat_comma_to_comma { + my ($str) = @_; + + # We are changing '=>' to ',' and removing any trailing decimal count + # because currently fat commas have a count and commas do not. + # For example, we will change '=>2+{-3.2' into ',2+{-3' + if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 } + return $str; +} + +sub get_line_token_info { + + # scan lines of tokens and return summary information about the range of + # levels and patterns. + my ($rlines) = @_; + + # First scan to check monotonicity. Here is an example of several + # lines which are monotonic. The = is the lowest level, and + # the commas are all one level deeper. So this is not nonmonotonic. + # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ]; + # $$d{"days"} = [ "d", "day", "days" ]; + # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ]; + my @all_token_info; + my $all_monotonic = 1; + for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) { + my ($line) = $rlines->[$jj]; + my $rtokens = $line->get_rtokens(); + my $last_lev; + my $is_monotonic = 1; + my $i = -1; + foreach my $tok ( @{$rtokens} ) { + $i++; + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token($tok); + push @{ $all_token_info[$jj] }, + [ $raw_tok, $lev, $tag, $tok_count ]; + last if ( $tok eq '#' ); + if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 } + $last_lev = $lev; + } + if ( !$is_monotonic ) { $all_monotonic = 0 } + } + + my $rline_values = []; + for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) { + my ($line) = $rlines->[$jj]; + + my $rtokens = $line->get_rtokens(); + my $i = -1; + my ( $lev_min, $lev_max ); + my $token_pattern_max = ""; + my %saw_level; + my @token_info; + my $is_monotonic = 1; + + # find the index of the last token before the side comment + my $imax = @{$rtokens} - 2; + my $imax_true = $imax; + + # If the entire group is monotonic, and the line ends in a comma list, + # walk it back to the first such comma. this will have the effect of + # making all trailing ragged comma lists match in the prune tree + # routine. these trailing comma lists can better be handled by later + # alignment rules. + + # Treat fat commas the same as commas here by converting them to + # commas. This will improve the chance of aligning the leading parts + # of ragged lists. + + my $tok_end = fat_comma_to_comma( $rtokens->[$imax] ); + if ( $all_monotonic && $tok_end =~ /^,/ ) { + my $i = $imax - 1; + while ( $i >= 0 + && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end ) + { + $imax = $i; + $i--; + } + } + + # make a first pass to find level range + my $last_lev; + foreach my $tok ( @{$rtokens} ) { + $i++; + last if ( $i > $imax ); + last if ( $tok eq '#' ); + my ( $raw_tok, $lev, $tag, $tok_count ) = + @{ $all_token_info[$jj]->[$i] }; + + last if ( $tok eq '#' ); + $token_pattern_max .= $tok; + $saw_level{$lev}++; + if ( !defined($lev_min) ) { + $lev_min = $lev; + $lev_max = $lev; + } + else { + if ( $lev < $lev_min ) { $lev_min = $lev; } + if ( $lev > $lev_max ) { $lev_max = $lev; } + if ( $lev < $last_lev ) { $is_monotonic = 0 } + } + $last_lev = $lev; + } + + # handle no levels + my $rtoken_patterns = {}; + my $rtoken_indexes = {}; + my @levs = sort keys %saw_level; + if ( !defined($lev_min) ) { + $lev_min = -1; + $lev_max = -1; + $levs[0] = -1; + $rtoken_patterns->{$lev_min} = ""; + $rtoken_indexes->{$lev_min} = []; + } + + # handle one level + elsif ( $lev_max == $lev_min ) { + $rtoken_patterns->{$lev_max} = $token_pattern_max; + $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ]; + } + + # handle multiple levels + else { + $rtoken_patterns->{$lev_max} = $token_pattern_max; + $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ]; + + my $debug = 0; + my $lev_top = pop @levs; # alread did max level + my $itok = -1; + foreach my $tok ( @{$rtokens} ) { + $itok++; + last if ( $itok > $imax ); + my ( $raw_tok, $lev, $tag, $tok_count ) = + @{ $all_token_info[$jj]->[$itok] }; + last if ( $raw_tok eq '#' ); + foreach my $lev_test (@levs) { + next if ( $lev > $lev_test ); + $rtoken_patterns->{$lev_test} .= $tok; + push @{ $rtoken_indexes->{$lev_test} }, $itok; + } + } + push @levs, $lev_top; + } + + push @{$rline_values}, + [ + $lev_min, $lev_max, $rtoken_patterns, \@levs, + $rtoken_indexes, $is_monotonic, $imax_true, $imax, + ]; + + # debug + 0 && do { + local $" = ')('; + print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n"; + foreach my $key ( sort keys %{$rtoken_patterns} ) { + print "$key => $rtoken_patterns->{$key}\n"; + print "$key => @{$rtoken_indexes->{$key}}\n"; + } + }; + } ## end loop over lines + return ( $rline_values, $all_monotonic ); +} + +sub prune_alignment_tree { + my ($rlines) = @_; + my $jmax = @{$rlines} - 1; + return unless $jmax > 0; + + # Vertical alignment in perltidy is done as an iterative process. The + # starting point is to mark all possible alignment tokens ('=', ',', '=>', + # etc) for vertical alignment. Then we have to delete all alignments + # which, if actually made, would detract from overall alignment. This + # is done in several phases of which this is one. + + # In this routine we look at the alignments of a group of lines as a + # hierarchical tree. We will 'prune' the tree to limited depths if that + # will improve overall alignment at the lower depths. + # For each line we will be looking at its alignment patterns down to + # different fixed depths. For each depth, we include all lower depths and + # ignore all higher depths. We want to see if we can get alignment of a + # larger group of lines if we ignore alignments at some lower depth. + # Here is an # example: + + # for ( + # [ '$var', sub { join $_, "bar" }, 0, "bar" ], + # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ], + # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ], + # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ], + # ); + + # In the above example, all lines have three commas at the lowest depth + # (zero), so if there were no other alignements, these lines would all + # align considering only the zero depth alignment token. But some lines + # have additional comma alignments at the next depth, so we need to decide + # if we should drop those to keep the top level alignments, or keep those + # for some additional low level alignments at the expense losing some top + # level alignments. In this case we will drop the deeper level commas to + # keep the entire collection aligned. But in some cases the decision could + # go the other way. + + # The tree for this example at the zero depth has one node containing + # all four lines, since they are identical at zero level (three commas). + # At depth one, there are three 'children' nodes, namely: + # - lines 1 and 2, which have a single comma in the 'sub' at depth 1 + # - line 3, which has 2 commas at depth 1 + # - line4, which has a ';' and a ',' at depth 1 + # There are no deeper alignments in this example. + # so the tree structure for this example is: + # + # depth 0 depth 1 depth 2 + # [lines 1-4] -- [line 1-2] - (empty) + # | [line 3] - (empty) + # | [line 4] - (empty) + + # We can carry this to any depth, but it is not really useful to go below + # depth 2. To cleanly stop there, we will consider depth 2 to contain all + # alignments at depth >=2. + + use constant EXPLAIN_PRUNE => 0; + + #################################################################### + # Prune Tree Step 1. Start by scanning the lines and collecting info + #################################################################### + + # Note that the caller had this info but we have to redo this now because + # alignment tokens may have been deleted. + my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines); + + # If all the lines have levels which increase monotonically from left to + # right, then the sweep-left-to-right pass can do a better job of alignment + # than pruning, and without deleting alignments. + return if ($all_monotonic); + + # Contents of $rline_values + # [ + # $lev_min, $lev_max, $rtoken_patterns, \@levs, + # $rtoken_indexes, $is_monotonic, $imax_true, $imax, + # ]; + + # We can work to any depth, but there is little advantage to working + # to a a depth greater than 2 + my $MAX_DEPTH = 2; + + # This arrays will hold the tree of alignment tokens at different depths + # for these lines. + my @match_tree; + + # Tree nodes contain these values: + # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern, + # $nc_beg_p, $nc_end_p, $rindexes]; + # where + # $depth = 0,1,2 = index of depth of the match + + # $jbeg beginning index j of the range of lines in this match + # $jend ending index j of the range of lines in this match + # $n_parent = index of the containing group at $depth-1, if it exists + # $level = actual level of code being matched in this group + # $pattern = alignment pattern being matched + # $nc_beg_p = first child + # $nc_end_p = last child + # $rindexes = ref to token indexes + + # the patterns and levels of the current group being formed at each depth + my ( @token_patterns_current, @levels_current, @token_indexes_current ); + + # the patterns and levels of the next line being tested at each depth + my ( @token_patterns_next, @levels_next, @token_indexes_next ); + + ######################################################### + # define a recursive worker subroutine for tree construction + ######################################################### + + # This is a recursive routine which is called if a match condition changes + # at any depth when a new line is encountered. It ends the match node + # which changed plus all deeper nodes attached to it. + my $end_node; + $end_node = sub { + my ( $depth, $jl, $n_parent ) = @_; + + # $depth is the tree depth + # $jl is the index of the line + # $n_parent is index of the parent node of this node + + return if ( $depth > $MAX_DEPTH ); + + # end any current group at this depth + if ( $jl >= 0 + && defined( $match_tree[$depth] ) + && @{ $match_tree[$depth] } + && defined( $levels_current[$depth] ) ) + { + $match_tree[$depth]->[-1]->[1] = $jl; + } + + # Define the index of the node we will create below + my $ng_self = 0; + if ( defined( $match_tree[$depth] ) ) { + $ng_self = @{ $match_tree[$depth] }; + } + + # end any next deeper child node(s) + $end_node->( $depth + 1, $jl, $ng_self ); + + # update the levels being matched + $token_patterns_current[$depth] = $token_patterns_next[$depth]; + $token_indexes_current[$depth] = $token_indexes_next[$depth]; + $levels_current[$depth] = $levels_next[$depth]; + + # Do not start a new group at this level if it is not being used + if ( !defined( $levels_next[$depth] ) + || $depth > 0 + && $levels_next[$depth] <= $levels_next[ $depth - 1 ] ) + { + return; + } + + # Create a node for the next group at this depth. We initially assume + # that it will continue to $jmax, and correct that later if the node + # ends earlier. + push @{ $match_tree[$depth] }, + [ + $jl + 1, $jmax, $n_parent, $levels_current[$depth], + $token_patterns_current[$depth], + undef, undef, $token_indexes_current[$depth], + ]; + + return; + }; ## end sub end_node + + ###################################################### + # Prune Tree Step 2. Loop to form the tree of matches. + ###################################################### + for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) { + + # working with two adjacent line indexes, 'm'=minus, 'p'=plus + my $jm = $jp - 1; + + # Pull out needed values for the next line + my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes, + $is_monotonic, $imax_true, $imax ) + = @{ $rline_values->[$jp] }; + + # Transfer levels and patterns for this line to the working arrays. + # If the number of levels differs from our chosen MAX_DEPTH ... + # if fewer than MAX_DEPTH: leave levels at missing depths undefined + # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum + @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ]; + if ( @{$rlevs} > $MAX_DEPTH ) { + $levels_next[$MAX_DEPTH] = $rlevs->[-1]; + } + my $depth = 0; + foreach (@levels_next) { + $token_patterns_next[$depth] = + defined($_) ? $rtoken_patterns->{$_} : undef; + $token_indexes_next[$depth] = + defined($_) ? $rtoken_indexes->{$_} : undef; + $depth++; + } + + # Look for a change in match groups... + + # Initialize on the first line + if ( $jp == 0 ) { + my $n_parent; + $end_node->( 0, $jm, $n_parent ); + } + + # End groups if a hard flag has been set + elsif ( $rlines->[$jm]->get_end_group() ) { + my $n_parent; + $end_node->( 0, $jm, $n_parent ); + } + + # Continue at hanging side comment + elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) { + next; + } + # Otherwise see if anything changed and update the tree if so + else { + foreach my $depth ( 0 .. $MAX_DEPTH ) { + + my $def_current = defined( $token_patterns_current[$depth] ); + my $def_next = defined( $token_patterns_next[$depth] ); + last unless ( $def_current || $def_next ); + if ( !$def_current + || !$def_next + || $token_patterns_current[$depth] ne + $token_patterns_next[$depth] ) + { + my $n_parent; + if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) { + $n_parent = @{ $match_tree[ $depth - 1 ] } - 1; + } + $end_node->( $depth, $jm, $n_parent ); + last; + } + } + } + } ## end loop to form tree of matches + + ########################################################## + # Prune Tree Step 3. Make links from parent to child nodes + ########################################################## + + # It seemed cleaner to do this as a separate step rather than during tree + # construction. The children nodes have links up to the parent node which + # created them. Now make links in the opposite direction, so the parents + # can find the children. We store the range of children nodes ($nc_beg, + # $nc_end) of each parent with two additional indexes in the orignal array. + # These will be undef if no children. + for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) { + next unless defined( $match_tree[$depth] ); + my $nc_max = @{ $match_tree[$depth] } - 1; + my $np_now; + foreach my $nc ( 0 .. $nc_max ) { + my $np = $match_tree[$depth]->[$nc]->[2]; + if ( !defined($np) ) { + + # shouldn't happen + #print STDERR "lost child $np at depth $depth\n"; + next; + } + if ( !defined($np_now) || $np != $np_now ) { + $np_now = $np; + $match_tree[ $depth - 1 ]->[$np]->[5] = $nc; + } + $match_tree[ $depth - 1 ]->[$np]->[6] = $nc; + } + } ## end loop to make links down to the child nodes + + EXPLAIN_PRUNE > 0 && do { + print "Tree complete. Found these groups:\n"; + foreach my $depth ( 0 .. $MAX_DEPTH ) { + Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" ); + } + }; + + ####################################################### + # Prune Tree Step 4. Make a list of nodes to be deleted + ####################################################### + + # list of lines with tokens to be deleted: + # [$jbeg, $jend, $level_keep] + # $jbeg..$jend is the range of line indexes, + # $level_keep is the minimum level to keep + my @delete_list; + + # Groups with ending comma lists and their range of sizes: + # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ] + my %ragged_comma_group; + + # Define a threshold line count for forcing a break + my $nlines_break = 3; + + # We work with a list of nodes to visit at the next deeper depth. + my @todo_list; + if ( defined( $match_tree[0] ) ) { + @todo_list = ( 0 .. @{ $match_tree[0] } - 1 ); + } + + for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) { + last unless (@todo_list); + my @todo_next; + foreach my $np (@todo_list) { + my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p, + $rindexes_p ) + = @{ $match_tree[$depth]->[$np] }; + my $nlines_p = $jend_p - $jbeg_p + 1; + + # nothing to do if no children + next unless defined($nc_beg_p); + + # Define the number of lines to either keep or delete a child node. + # This is the key decision we have to make. We want to delete + # short runs of matched lines, and keep long runs. It seems easier + # for the eye to follow breaks in monotonic level changes than + # non-monotonic level changes. For example, the following looks + # best if we delete the lower level alignments: + + # [1] ~~ []; + # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ]; + # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ]; + # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ]; + # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ]; + # $deep1 ~~ $deep1; + + # So we will use two thresholds. + my $nmin_mono = $depth + 2; + my $nmin_non_mono = $depth + 6; + if ( $nmin_mono > $nlines_p - 1 ) { + $nmin_mono = $nlines_p - 1; + } + if ( $nmin_non_mono > $nlines_p - 1 ) { + $nmin_non_mono = $nlines_p - 1; + } + + # loop to keep or delete each child node + foreach my $nc ( $nc_beg_p .. $nc_end_p ) { + my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c, + $nc_end_c ) + = @{ $match_tree[ $depth + 1 ]->[$nc] }; + my $nlines_c = $jend_c - $jbeg_c + 1; + my $is_monotonic = $rline_values->[$jbeg_c]->[5]; + my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono; + if ( $nlines_c < $nmin ) { +##print "deleting child, nlines=$nlines_c, nmin=$nmin\n"; + push @delete_list, [ $jbeg_c, $jend_c, $lev_p ]; + } + else { +##print "keeping child, nlines=$nlines_c, nmin=$nmin\n"; + push @todo_next, $nc; + } + } + } + @todo_list = @todo_next; + } ## end loop to mark nodes to delete + + ############################################################# + # Prune Tree Step 5. Loop to delete selected alignment tokens + ############################################################# + foreach my $item (@delete_list) { + my ( $jbeg, $jend, $level_keep ) = @{$item}; + foreach my $jj ( $jbeg .. $jend ) { + my $line = $rlines->[$jj]; + my @idel; + my $rtokens = $line->get_rtokens(); + my $imax = @{$rtokens} - 2; + for ( my $i = 0 ; $i <= $imax ; $i++ ) { + my $tok = $rtokens->[$i]; + my ( $raw_tok, $lev, $tag, $tok_count ) = + decode_alignment_token($tok); + if ( $lev > $level_keep ) { + push @idel, $i; + } + } + if (@idel) { + delete_selected_tokens( $line, \@idel ); + } + } + } ## end loop to delete selected alignment tokens + + return; +} ## end sub prune_alignment_tree + +sub Dump_tree_groups { + my ( $rgroup, $msg ) = @_; + print "$msg\n"; + local $" = ')('; + foreach my $item ( @{$rgroup} ) { + my @fix = @{$item}; + foreach (@fix) { $_ = "undef" unless defined $_; } + $fix[4] = "..."; + print "(@fix)\n"; + } return; } -{ # decide_if_aligned_pair +{ ## closure for sub is_marginal_match my %is_if_or; my %is_assignment; + my %is_good_alignment; + + # This test did not give sufficiently better results to use as an update, + # but the flag is worth keeping as a starting point for future testing. + use constant TEST_MARGINAL_EQ_ALIGNMENT => 0; BEGIN { my @q = qw( - if or || + if unless or || ); @is_if_or{@q} = (1) x scalar(@q); @@ -2567,59 +3836,165 @@ sub delete_unmatched_tokens { x= ); @is_assignment{@q} = (1) x scalar(@q); + + # Vertically aligning on certain "good" tokens is usually okay + # so we can be less restrictive in marginal cases. + @q = qw( { ? => = ); + push @q, (','); + @is_good_alignment{@q} = (1) x scalar(@q); } - sub decide_if_aligned_pair { + sub is_marginal_match { - # Do not try to align two lines which are not really similar - return unless ( @group_lines == 2 ); - return if ($is_matching_terminal_line); + my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_; - # always align lists - my $group_list_type = $group_lines[0]->get_list_type(); - return 0 if ($group_list_type); + # Decide if we should undo some or all of the common alignments of a + # group of just two lines. + + # Given: + # $line_0 and $line_1 - the two lines + # $group_level = the indentation level of the group being processed + # $imax_align = the maximum index of the common alignment tokens + # of the two lines + # $imax_prev = the maximum index of the common alignment tokens + # with the line before $line_0 (=-1 of does not exist) + + # Return: + # $is_marginal = true if the two lines should NOT be fully aligned + # = false if the two lines can remain fully aligned + # $imax_align = the index of the highest alignment token shared by + # these two lines to keep if the match is marginal. - my $jmax0 = $group_lines[0]->get_jmax(); - my $jmax1 = $group_lines[1]->get_jmax(); - my $rtokens = $group_lines[0]->get_rtokens(); - my $leading_equals = ( $rtokens->[0] =~ /=/ ); + # When we have an alignment group of just two lines like this, we are + # working in the twilight zone of what looks good and what looks bad. + # This routine is a collection of rules which work have been found to + # work fairly well, but it will need to be updated from time to time. - # scan the tokens on the second line - my $rtokens1 = $group_lines[1]->get_rtokens(); - my $saw_if_or; # if we saw an 'if' or 'or' at group level + my $is_marginal = 0; + + # always keep alignments of a terminal else or ternary + goto RETURN if ( defined( $line_1->get_j_terminal_match() ) ); + + # always align lists + my $group_list_type = $line_0->get_list_type(); + goto RETURN if ($group_list_type); + + # always align hanging side comments + my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment(); + goto RETURN if ($is_hanging_side_comment); + + my $jmax_0 = $line_0->get_jmax(); + my $jmax_1 = $line_1->get_jmax(); + my $rtokens_1 = $line_1->get_rtokens(); + my $rtokens_0 = $line_0->get_rtokens(); + my $rfield_lengths_0 = $line_0->get_rfield_lengths(); + my $rfield_lengths_1 = $line_1->get_rfield_lengths(); + my $rpatterns_0 = $line_0->get_rpatterns(); + my $rpatterns_1 = $line_1->get_rpatterns(); + my $imax_next = $line_1->get_imax_pair(); + + # We will scan the alignment tokens and set a flag '$is_marginal' if + # it seems that the an alignment would look bad. + my $max_pad = 0; + my $saw_good_alignment = 0; + my $saw_if_or; # if we saw an 'if' or 'or' at group level my $raw_tokb = ""; # first token seen at group level - for ( my $j = 0 ; $j < $jmax1 - 1 ; $j++ ) { + my $jfirst_bad; + my $line_ending_fat_comma; # is last token just a '=>' ? + my $j0_eq_pad; + my $j0_max_pad = 0; + + for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) { my ( $raw_tok, $lev, $tag, $tok_count ) = - decode_alignment_token( $rtokens1->[$j] ); + decode_alignment_token( $rtokens_1->[$j] ); if ( $raw_tok && $lev == $group_level ) { if ( !$raw_tokb ) { $raw_tokb = $raw_tok } $saw_if_or ||= $is_if_or{$raw_tok}; } - } - # A marginal match is a match which has different patterns. Normally, - # we should not allow exactly two lines to match if marginal. But - # we can allow matching in some specific cases. - my $is_marginal = $marginal_match; + # When the first of the two lines ends in a bare '=>' this will + # probably be marginal match. (For a bare =>, the next field length + # will be 2 or 3, depending on side comment) + $line_ending_fat_comma = + $j == $jmax_1 - 2 + && $raw_tok eq '=>' + && $rfield_lengths_0->[ $j + 1 ] <= 3; + + my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j]; + if ( $j == 0 ) { + $pad += $line_1->get_leading_space_count() - + $line_0->get_leading_space_count(); + + # Remember the pad at a leading equals + if ( $raw_tok eq '=' && $lev == $group_level ) { + $j0_eq_pad = $pad; + $j0_max_pad = + 0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] ); + $j0_max_pad = 4 if ( $j0_max_pad < 4 ); + } + } - # lines with differing number of alignment tokens are marginal - $is_marginal ||= - $previous_maximum_jmax_seen != $previous_minimum_jmax_seen - && !$is_assignment{$raw_tokb}; + if ( $pad < 0 ) { $pad = -$pad } + if ( $pad > $max_pad ) { $max_pad = $pad } + if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) { + $saw_good_alignment = 1; + } + else { + $jfirst_bad = $j unless defined($jfirst_bad); + } + if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) { + + # Flag this as a marginal match since patterns differ. + # Normally, we will not allow just two lines to match if + # marginal. But we can allow matching in some specific cases. + + $jfirst_bad = $j if ( !defined($jfirst_bad) ); + $is_marginal = 1 if ( $is_marginal == 0 ); + if ( $raw_tok eq '=' ) { + + # Here is an example of a marginal match: + # $done{$$op} = 1; + # $op = compile_bblock($op); + # The left tokens are both identifiers, but + # one accesses a hash and the other doesn't. + # We'll let this be a tentative match and undo + # it later if we don't find more than 2 lines + # in the group. + $is_marginal = 2; + } + } + } + + $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma ); + + # Turn off the "marginal match" flag in some cases... + # A "marginal match" occurs when the alignment tokens agree + # but there are differences in the other tokens (patterns). + # If we leave the marginal match flag set, then the rule is that we + # will align only if there are more than two lines in the group. + # We will turn of the flag if we almost have a match + # and either we have seen a good alignment token or we + # just need a small pad (2 spaces) to fit. These rules are + # the result of experimentation. Tokens which misaligned by just + # one or two characters are annoying. On the other hand, + # large gaps to less important alignment tokens are also annoying. + if ( $is_marginal == 1 + && ( $saw_good_alignment || $max_pad < 3 ) ) + { + $is_marginal = 0; + } # We will use the line endings to help decide on alignments... # See if the lines end with semicolons... - my $rpatterns0 = $group_lines[0]->get_rpatterns(); - my $rpatterns1 = $group_lines[1]->get_rpatterns(); my $sc_term0; my $sc_term1; - if ( $jmax0 < 1 || $jmax1 < 1 ) { + if ( $jmax_0 < 1 || $jmax_1 < 1 ) { # shouldn't happen } else { - my $pat0 = $rpatterns0->[ $jmax0 - 1 ]; - my $pat1 = $rpatterns1->[ $jmax1 - 1 ]; + my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ]; + my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ]; $sc_term0 = $pat0 =~ /;b?$/; $sc_term1 = $pat1 =~ /;b?$/; } @@ -2641,200 +4016,498 @@ sub delete_unmatched_tokens { # grep { /$handles/ } $self->_get_delegate_method_list; $is_marginal ||= ( $raw_tokb eq '(' || $raw_tokb eq '{' ) - && $jmax1 == 2 + && $jmax_1 == 2 && $sc_term0 ne $sc_term1; + ######################################## + # return unless this is a marginal match + ######################################## + goto RETURN if ( !$is_marginal ); + # Undo the marginal match flag in certain cases, - if ($is_marginal) { - - # Two lines with a leading equals-like operator are allowed to - # align if the patterns to the left of the equals are the same. - # For example the following two lines are a marginal match but have - # the same left side patterns, so we will align the equals. - # my $orig = my $format = "^<<<<< ~~\n"; - # my $abc = "abc"; - # But these have a different left pattern so they will not be - # aligned - # $xmldoc .= $`; - # $self->{'leftovers'} .= "[0]; - my $pat1 = $rpatterns1->[0]; - - ########################################################## - # Turn off the marginal flag for some types of assignments - ########################################################## - if ( $is_assignment{$raw_tokb} ) { - # undo marginal flag if first line is semicolon terminated - # and leading patters match - if ($sc_term0) { # && $sc_term1) { - $is_marginal = $pat0 ne $pat1; - } + # Two lines with a leading equals-like operator are allowed to + # align if the patterns to the left of the equals are the same. + # For example the following two lines are a marginal match but have + # the same left side patterns, so we will align the equals. + # my $orig = my $format = "^<<<<< ~~\n"; + # my $abc = "abc"; + # But these have a different left pattern so they will not be + # aligned + # $xmldoc .= $`; + # $self->{'leftovers'} .= "[0]; + my $pat1 = $rpatterns_1->[0]; + + ########################################################## + # Turn off the marginal flag for some types of assignments + ########################################################## + if ( $is_assignment{$raw_tokb} ) { + + # undo marginal flag if first line is semicolon terminated + # and leading patters match + if ($sc_term0) { # && $sc_term1) { + $is_marginal = $pat0 ne $pat1; } - elsif ( $raw_tokb eq '=>' ) { + } + elsif ( $raw_tokb eq '=>' ) { + + # undo marginal flag if patterns match + $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma; + } + elsif ( $raw_tokb eq '=~' ) { - # undo marginal flag if patterns match + # undo marginal flag if both lines are semicolon terminated + # and leading patters match + if ( $sc_term1 && $sc_term0 ) { $is_marginal = $pat0 ne $pat1; } - elsif ( $raw_tokb eq '=~' ) { + } - # undo marginal flag if both lines are semicolon terminated - # and leading patters match - if ( $sc_term1 && $sc_term0 ) { - $is_marginal = $pat0 ne $pat1; - } + ###################################################### + # Turn off the marginal flag if we saw an 'if' or 'or' + ###################################################### + + # A trailing 'if' and 'or' often gives a good alignment + # For example, we can align these: + # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/; + # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/; + + # or + # $d_in_m[2] = 29 if ( &Date_LeapYear($y) ); + # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] ); + + if ($saw_if_or) { + + # undo marginal flag if both lines are semicolon terminated + if ( $sc_term0 && $sc_term1 ) { + $is_marginal = 0; } + } + + # For a marginal match, only keep matches before the first 'bad' match + if ( $is_marginal + && defined($jfirst_bad) + && $imax_align > $jfirst_bad - 1 ) + { + $imax_align = $jfirst_bad - 1; + } - ###################################################### - # Turn off the marginal flag if we saw an 'if' or 'or' - ###################################################### + ########################################################### + # Allow sweep to match lines with leading '=' in some cases + ########################################################### + if ( $imax_align < 0 && defined($j0_eq_pad) ) { + + if ( - # A trailing 'if' and 'or' often gives a good alignment - # For example, we can align these: - # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/; - # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/; + # If there is a following line with leading equals, or + # preceding line with leading equals, then let the sweep align + # them without restriction. For example, the first two lines + # here are a marginal match, but they are followed by a line + # with leading equals, so the sweep-lr logic can align all of + # the lines: + + # $date[1] = $month_to_num{ $date[1] }; # <--line_0 + # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1 + # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] ); + # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] ); + + # Likewise, if we reverse the two pairs we want the same result + + # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] ); + # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] ); + # $date[1] = $month_to_num{ $date[1] }; # <--line_0 + # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1 + + ( + $imax_next >= 0 + || $imax_prev >= 0 + || TEST_MARGINAL_EQ_ALIGNMENT + ) + && $j0_eq_pad >= -$j0_max_pad + && $j0_eq_pad <= $j0_max_pad + ) + { - # or - # $d_in_m[2] = 29 if ( &Date_LeapYear($y) ); - # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] ); + # But do not do this if there is a comma before the '='. + # For example, the first two lines below have commas and + # therefore are not allowed to align with lines 3 & 4: - if ($saw_if_or) { + # my ( $x, $y ) = $self->Size(); #<--line_0 + # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1 + # my $vx = $right - $left; + # my $vy = $bottom - $top; - # undo marginal flag if both lines are semicolon terminated - if ( $sc_term0 && $sc_term1 ) { - $is_marginal = 0; + if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) { + $imax_align = 0; } } } - ############################### - # Set the return flag: - # Don't align if still marginal - ############################### - my $do_not_align = $is_marginal; + RETURN: + return ( $is_marginal, $imax_align ); + } +} + +sub get_extra_leading_spaces { - # But try to convert them into a simple comment group if the first line - # a has side comment - my $rfields = $group_lines[0]->get_rfields(); - my $maximum_field_index = $group_lines[0]->get_jmax(); - if ( $do_not_align - && ( length( $rfields->[$maximum_field_index] ) > 0 ) ) - { - combine_fields(); - $do_not_align = 0; + my ( $rlines, $rgroups ) = @_; + + #---------------------------------------------------------- + # Define any extra indentation space (for the -lp option). + # Here is why: + # If a list has side comments, sub scan_list must dump the + # list before it sees everything. When this happens, it sets + # the indentation to the standard scheme, but notes how + # many spaces it would have liked to use. We may be able + # to recover that space here in the event that all of the + # lines of a list are back together again. + #---------------------------------------------------------- + + return 0 unless ( @{$rlines} && @{$rgroups} ); + + my $object = $rlines->[0]->get_indentation(); + return 0 unless ( ref($object) ); + my $extra_leading_spaces = 0; + my $extra_indentation_spaces_wanted = get_recoverable_spaces($object); + return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted); + + my $min_spaces = $extra_indentation_spaces_wanted; + if ( $min_spaces > 0 ) { $min_spaces = 0 } + + # loop over all groups + my $ng = -1; + my $ngroups = @{$rgroups}; + foreach my $item ( @{$rgroups} ) { + $ng++; + my ( $jbeg, $jend ) = @{$item}; + foreach my $j ( $jbeg .. $jend ) { + next if ( $j == 0 ); + + # all indentation objects must be the same + if ( $object != $rlines->[$j]->get_indentation() ) { + return 0; + } + } + + # find the maximum space without exceeding the line length for this group + my $avail = $rlines->[$jbeg]->get_available_space_on_right(); + my $spaces = + ( $avail > $extra_indentation_spaces_wanted ) + ? $extra_indentation_spaces_wanted + : $avail; + + ######################################################### + # Note: min spaces can be negative; for example with -gnu + # f( + # do { 1; !!(my $x = bless []); } + # ); + ######################################################### + # The following rule is needed to match older formatting: + # For multiple groups, we will keep spaces non-negative. + # For a single group, we will allow a negative space. + if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 } + + # update the minimum spacing + if ( $ng == 0 || $spaces < $extra_leading_spaces ) { + $extra_leading_spaces = $spaces; } - return $do_not_align; } + + # update the indentation object because with -icp the terminal + # ');' will use the same adjustment. + $object->permanently_decrease_available_spaces( -$extra_leading_spaces ); + return $extra_leading_spaces; +} + +sub forget_side_comment { + my ($self) = @_; + $self->[_last_side_comment_column_] = 0; + return; } -sub adjust_side_comment { +sub is_good_side_comment_column { + my ( $self, $line, $line_number, $level, $num5 ) = @_; + + # Upon encountering the first side comment of a group, decide if + # a previous side comment should be forgotten. This involves + # checking several rules. + + # Return true to keep old comment location + # Return false to forget old comment location + + my $rfields = $line->get_rfields(); + my $is_hanging_side_comment = $line->get_is_hanging_side_comment(); + + # RULE1: Never forget comment before a hanging side comment + goto KEEP if ($is_hanging_side_comment); + + # RULE2: Forget a side comment after a short line difference, + # where 'short line difference' is computed from a formula. + # Using a smooth formula helps minimize sudden large changes. + my $line_diff = $line_number - $self->[_last_side_comment_line_number_]; + my $alev_diff = abs( $level - $self->[_last_side_comment_level_] ); + + # '$num5' is the number of comments in the first 5 lines after the first + # comment. It is needed to keep a compact group of side comments from + # being influenced by a more distant side comment. + $num5 = 1 unless ($num5); + + # Some values: + + # $adiff $num5 $short_diff + # 0 * 12 + # 1 1 6 + # 1 2 4 + # 1 3 3 + # 1 4 2 + # 2 1 4 + # 2 2 2 + # 2 3 1 + # 3 1 3 + # 3 2 1 + + my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 ); + + goto FORGET + if ( $line_diff > $short_diff ); + + # RULE3: Forget a side comment if this line is at lower level and + # ends a block + my $last_sc_level = $self->[_last_side_comment_level_]; + goto FORGET + if ( $level < $last_sc_level + && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } ); + + # RULE 4: Forget the last side comment if this comment might join a cached + # line ... + if ( my $cached_line_type = get_cached_line_type() ) { + + # ... otherwise side comment alignment will get messed up. + # For example, in the following test script + # with using 'perltidy -sct -act=2', the last comment would try to + # align with the previous and then be in the wrong column when + # the lines are combined: + + # foreach $line ( + # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows + # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns + # [0, 4, 8], [2, 4, 6] + # ) # diagonals + goto FORGET + if ( $cached_line_type == 2 || $cached_line_type == 4 ); + } + + # Otherwise, keep it alive + goto KEEP; - my $do_not_align = shift; + FORGET: + return 0; - # let's see if we can move the side comment field out a little - # to improve readability (the last field is always a side comment field) - my $have_side_comment = 0; - my $first_side_comment_line = -1; - my $maximum_field_index = $group_lines[0]->get_jmax(); - my $i = 0; - foreach my $line (@group_lines) { - if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { - $have_side_comment = 1; - $first_side_comment_line = $i; - last; + KEEP: + return 1; +} + +sub align_side_comments { + + my ( $self, $rlines, $rgroups ) = @_; + + # Align any side comments in this batch of lines + + # Given: + # $rlines - the lines + # $rgroups - the partition of the lines into groups + # + # We will be working group-by-group because all side comments + # (real or fake) in each group are already aligned. So we just have + # to make alignments between groups wherever possible. + + # An unusual aspect is that within each group we have aligned both real + # and fake side comments. This has the consequence that the lengths of + # long lines without real side comments can cause 'push' all side comments + # to the right. This seems unusual, but testing with and without this + # feature shows that it is usually better this way. Othewise, side + # comments can be hidden between long lines without side comments and + # thus be harder to read. + + my $group_level = $self->[_group_level_]; + my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0 + && $group_level == $self->[_last_level_written_]; + + # Find groups with side comments, and remember the first nonblank comment + my $j_sc_beg; + my @todo; + my $ng = -1; + foreach my $item ( @{$rgroups} ) { + $ng++; + my ( $jbeg, $jend ) = @{$item}; + foreach my $j ( $jbeg .. $jend ) { + my $line = $rlines->[$j]; + my $jmax = $line->get_jmax(); + if ( $line->get_rfield_lengths()->[$jmax] ) { + + # this group has a line with a side comment + push @todo, $ng; + if ( !defined($j_sc_beg) ) { + $j_sc_beg = $j; + } + last; + } } - $i++; } - my $kmax = $maximum_field_index + 1; + # done if no groups with side comments + return unless @todo; + + # Count $num5 = number of comments in the 5 lines after the first comment + # This is an important factor in a decision formula + my $num5 = 1; + for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) { + my $ldiff = $jj - $j_sc_beg; + last if ( $ldiff > 5 ); + my $line = $rlines->[$jj]; + my $jmax = $line->get_jmax(); + my $sc_len = $line->get_rfield_lengths()->[$jmax]; + next unless ($sc_len); + $num5++; + } + + # Forget the old side comment location if necessary + my $line = $rlines->[$j_sc_beg]; + my $lnum = + $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number(); + my $keep_it = + $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 ); + my $last_side_comment_column = + $keep_it ? $self->[_last_side_comment_column_] : 0; + + # If there are multiple groups we will do two passes + # so that we can find a common alignment for all groups. + my $MAX_PASS = @todo > 1 ? 2 : 1; + + # Loop over passes + my $max_comment_column = $last_side_comment_column; + for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) { + + # If there are two passes, then on the last pass make the old column + # equal to the largest of the group. This will result in the comments + # being aligned if possible. + if ( $PASS == $MAX_PASS ) { + $last_side_comment_column = $max_comment_column; + } - if ($have_side_comment) { + # Loop over the groups with side comments + my $column_limit; + foreach my $ng (@todo) { + my ( $jbeg, $jend ) = @{ $rgroups->[$ng] }; - my $line = $group_lines[0]; + # Note that since all lines in a group have common alignments, we + # just have to work on one of the lines (the first line). + my $line = $rlines->[$jbeg]; + my $jmax = $line->get_jmax(); + my $is_hanging_side_comment = $line->get_is_hanging_side_comment(); + last + if ( $PASS < $MAX_PASS && $is_hanging_side_comment ); - # the maximum space without exceeding the line length: - my $avail = $line->get_available_space_on_right(); + # the maximum space without exceeding the line length: + my $avail = $line->get_available_space_on_right(); - # try to use the previous comment column - my $side_comment_column = $line->get_column( $kmax - 2 ); - my $move = $last_comment_column - $side_comment_column; + # try to use the previous comment column + my $side_comment_column = $line->get_column( $jmax - 1 ); + my $move = $last_side_comment_column - $side_comment_column; -## my $sc_line0 = $side_comment_history[0]->[0]; -## my $sc_col0 = $side_comment_history[0]->[1]; -## my $sc_line1 = $side_comment_history[1]->[0]; -## my $sc_col1 = $side_comment_history[1]->[1]; -## my $sc_line2 = $side_comment_history[2]->[0]; -## my $sc_col2 = $side_comment_history[2]->[1]; -## -## # FUTURE UPDATES: -## # Be sure to ignore 'do not align' and '} # end comments' -## # Find first $move > 0 and $move <= $avail as follows: -## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 -## # 2. try sc_col2 if (line-sc_line2) < 12 -## # 3. try min possible space, plus up to 8, -## # 4. try min possible space + # Remember the maximum possible column of the first line with + # side comment + if ( !defined($column_limit) ) { + $column_limit = $side_comment_column + $avail; + } - if ( $kmax > 0 && !$do_not_align ) { + next if ( $jmax <= 0 ); # but if this doesn't work, give up and use the minimum space + my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1; if ( $move > $avail ) { - $move = $rOpts_minimum_space_to_comment - 1; + $move = $min_move; } # but we want some minimum space to the comment - my $min_move = $rOpts_minimum_space_to_comment - 1; if ( $move >= 0 - && $last_side_comment_length > 0 - && ( $first_side_comment_line == 0 ) - && $group_level == $last_level_written ) + && $j_sc_beg == 0 + && $continuing_sc_flow ) { $min_move = 0; } + # remove constraints on hanging side comments + if ($is_hanging_side_comment) { $min_move = 0 } + if ( $move < $min_move ) { $move = $min_move; } - # previously, an upper bound was placed on $move here, - # (maximum_space_to_comment), but it was not helpful - # don't exceed the available space if ( $move > $avail ) { $move = $avail } - # we can only increase space, never decrease - if ( $move > 0 ) { - $line->increase_field_width( $maximum_field_index - 1, $move ); - } + # We can only increase space, never decrease. + if ( $move < 0 ) { $move = 0 } - # remember this column for the next group - $last_comment_column = $line->get_column( $kmax - 2 ); - } - else { + # Discover the largest column on the preliminary pass + if ( $PASS < $MAX_PASS ) { + my $col = $line->get_column( $jmax - 1 ) + $move; - # try to at least line up the existing side comment location - if ( $kmax > 0 && $move > 0 && $move < $avail ) { - $line->increase_field_width( $maximum_field_index - 1, $move ); - $do_not_align = 0; + # but ignore columns too large for the starting line + if ( $col > $max_comment_column && $col < $column_limit ) { + $max_comment_column = $col; + } } - # reset side comment column if we can't align + # Make the changes on the final pass else { - forget_side_comment(); + $line->increase_field_width( $jmax - 1, $move ); + + # remember this column for the next group + $last_side_comment_column = $line->get_column( $jmax - 1 ); } + } ## end loop over groups + } ## end loop over passes + + # Find the last side comment + my $j_sc_last; + my $ng_last = $todo[-1]; + my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] }; + for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) { + my $line = $rlines->[$jj]; + my $jmax = $line->get_jmax(); + if ( $line->get_rfield_lengths()->[$jmax] ) { + $j_sc_last = $jj; + last; } } - return $do_not_align; + + # Save final side comment info for possible use by the next batch + if ( defined($j_sc_last) ) { + my $line_number = + $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last; + $self->[_last_side_comment_column_] = $last_side_comment_column; + $self->[_last_side_comment_line_number_] = $line_number; + $self->[_last_side_comment_level_] = $group_level; + } + return; } +############################### +# CODE SECTION 6: Output Step A +############################### + sub valign_output_step_A { ############################################################### @@ -2843,21 +4516,31 @@ sub valign_output_step_A { # been found. Then it is shipped to the next step. ############################################################### - my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, - $extra_leading_spaces ) - = @_; + my ( $self, $rinput_hash ) = @_; + + my $line = $rinput_hash->{line}; + my $min_ci_gap = $rinput_hash->{min_ci_gap}; + my $do_not_align = $rinput_hash->{do_not_align}; + my $group_leader_length = $rinput_hash->{group_leader_length}; + my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces}; + my $level = $rinput_hash->{level}; + my $rfields = $line->get_rfields(); + my $rfield_lengths = $line->get_rfield_lengths(); my $leading_space_count = $line->get_leading_space_count(); my $outdent_long_lines = $line->get_outdent_long_lines(); my $maximum_field_index = $line->get_jmax(); my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); + my $Kend = $line->get_Kend(); + my $level_end = $line->get_level_end(); # add any extra spaces if ( $leading_space_count > $group_leader_length ) { $leading_space_count += $min_ci_gap; } - my $str = $rfields->[0]; + my $str = $rfields->[0]; + my $str_len = $rfield_lengths->[0]; # loop to concatenate all fields of this line and needed padding my $total_pad_count = 0; @@ -2868,483 +4551,695 @@ sub valign_output_step_A { if ( ( $j == $maximum_field_index ) && ( !defined( $rfields->[$j] ) - || ( length( $rfields->[$j] ) == 0 ) ) + || ( $rfield_lengths->[$j] == 0 ) ) ); # compute spaces of padding before this field my $col = $line->get_column( $j - 1 ); - my $pad = $col - ( length($str) + $leading_space_count ); + my $pad = $col - ( $str_len + $leading_space_count ); if ($do_not_align) { $pad = ( $j < $maximum_field_index ) ? 0 - : $rOpts_minimum_space_to_comment - 1; + : $self->[_rOpts_minimum_space_to_comment_] - 1; } # if the -fpsc flag is set, move the side comment to the selected # column if and only if it is possible, ignoring constraints on # line length and minimum space to comment - if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) + if ( $self->[_rOpts_fixed_position_side_comment_] + && $j == $maximum_field_index ) { - my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; + my $newpad = + $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1; if ( $newpad >= 0 ) { $pad = $newpad; } } # accumulate the padding if ( $pad > 0 ) { $total_pad_count += $pad; } - # add this field - if ( !defined $rfields->[$j] ) { - write_diagnostics("UNDEFined field at j=$j\n"); - } - # only add padding when we have a finite field; # this avoids extra terminal spaces if we have empty fields - if ( length( $rfields->[$j] ) > 0 ) { + if ( $rfield_lengths->[$j] > 0 ) { $str .= ' ' x $total_pad_count; + $str_len += $total_pad_count; $total_pad_count = 0; $str .= $rfields->[$j]; + $str_len += $rfield_lengths->[$j]; } else { $total_pad_count = 0; } - - # update side comment history buffer - if ( $j == $maximum_field_index ) { - my $lineno = $file_writer_object->get_output_line_number(); - shift @side_comment_history; - push @side_comment_history, [ $lineno, $col ]; - } } - my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) ); + my $side_comment_length = $rfield_lengths->[$maximum_field_index]; # ship this line off - valign_output_step_B( $leading_space_count + $extra_leading_spaces, - $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags, $group_level ); + $self->valign_output_step_B( + { + leading_space_count => $leading_space_count + $extra_leading_spaces, + line => $str, + line_length => $str_len, + side_comment_length => $side_comment_length, + outdent_long_lines => $outdent_long_lines, + rvertical_tightness_flags => $rvertical_tightness_flags, + level => $level, + level_end => $level_end, + Kend => $Kend, + } + ); return; } -sub get_extra_leading_spaces { +sub combine_fields { - #---------------------------------------------------------- - # Define any extra indentation space (for the -lp option). - # Here is why: - # If a list has side comments, sub scan_list must dump the - # list before it sees everything. When this happens, it sets - # the indentation to the standard scheme, but notes how - # many spaces it would have liked to use. We may be able - # to recover that space here in the event that all of the - # lines of a list are back together again. - #---------------------------------------------------------- + # We have a group of two lines for which we do not want to align tokens + # between index $imax_align and the side comment. So we will delete fields + # between $imax_align and the side comment. Alignments have already + # been set so we have to adjust them. - my $extra_leading_spaces = 0; - if ($extra_indent_ok) { - my $object = $group_lines[0]->get_indentation(); - if ( ref($object) ) { - my $extra_indentation_spaces_wanted = - get_recoverable_spaces($object); + my ( $line_0, $line_1, $imax_align ) = @_; - # all indentation objects must be the same - for my $i ( 1 .. @group_lines - 1 ) { - if ( $object != $group_lines[$i]->get_indentation() ) { - $extra_indentation_spaces_wanted = 0; - last; - } - } + if ( !defined($imax_align) ) { $imax_align = -1 } - if ($extra_indentation_spaces_wanted) { + # First delete the unwanted tokens + my $jmax_old = $line_0->get_jmax(); + my @old_alignments = $line_0->get_alignments(); + my @idel = ( $imax_align + 1 .. $jmax_old - 2 ); - # the maximum space without exceeding the line length: - my $avail = $group_lines[0]->get_available_space_on_right(); - $extra_leading_spaces = - ( $avail > $extra_indentation_spaces_wanted ) - ? $extra_indentation_spaces_wanted - : $avail; + return unless (@idel); - # update the indentation object because with -icp the terminal - # ');' will use the same adjustment. - $object->permanently_decrease_available_spaces( - -$extra_leading_spaces ); - } - } + foreach my $line ( $line_0, $line_1 ) { + delete_selected_tokens( $line, \@idel ); } - return $extra_leading_spaces; -} - -sub combine_fields { - - # combine all fields except for the comment field ( sidecmt.t ) - # Uses global variables: - # @group_lines - my $maximum_field_index = $group_lines[0]->get_jmax(); - foreach my $line (@group_lines) { - my $rfields = $line->get_rfields(); - foreach ( 1 .. $maximum_field_index - 1 ) { - $rfields->[0] .= $rfields->[$_]; - } - $rfields->[1] = $rfields->[$maximum_field_index]; - - $line->set_jmax(1); - $line->set_column( 0, 0 ); - $line->set_column( 1, 0 ); + # Now adjust the alignments. Note that the side comment alignment + # is always at jmax-1, and there is an ending alignment at jmax. + my @new_alignments; + if ( $imax_align >= 0 ) { + @new_alignments[ 0 .. $imax_align ] = + @old_alignments[ 0 .. $imax_align ]; } - $maximum_field_index = 1; - - foreach my $line (@group_lines) { - my $rfields = $line->get_rfields(); - for my $k ( 0 .. $maximum_field_index ) { - my $pad = length( $rfields->[$k] ) - $line->current_field_width($k); - if ( $k == 0 ) { - $pad += $line->get_leading_space_count(); - } - if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } + my $jmax_new = $line_0->get_jmax(); - } - } + $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ]; + $new_alignments[$jmax_new] = $old_alignments[$jmax_old]; + $line_0->set_alignments(@new_alignments); + $line_1->set_alignments(@new_alignments); return; } sub get_output_line_number { - # the output line number reported to a caller is the number of items - # written plus the number of items in the buffer - my $self = shift; - my $nlines = @group_lines; - return $nlines + $file_writer_object->get_output_line_number(); + # The output line number reported to a caller = + # the number of items still in the buffer + + # the number of items written. + return $_[0]->group_line_count() + + $_[0]->[_file_writer_object_]->get_output_line_number(); } -sub valign_output_step_B { - - ############################################################### - # This is Step B in writing vertically aligned lines. - # Vertical tightness is applied according to preset flags. - # In particular this routine handles stacking of opening - # and closing tokens. - ############################################################### +############################### +# CODE SECTION 7: Output Step B +############################### + +{ ## closure for sub valign_output_step_B + + # These are values for a cache used by valign_output_step_B. + my $cached_line_text; + my $cached_line_text_length; + my $cached_line_type; + my $cached_line_flag; + my $cached_seqno; + my $cached_line_valid; + my $cached_line_leading_space_count; + my $cached_seqno_string; + my $cached_line_Kend; + my $seqno_string; + my $last_nonblank_seqno_string; + + sub get_seqno_string { + return $seqno_string; + } - my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags, $level ) - = @_; + sub get_last_nonblank_seqno_string { + return $last_nonblank_seqno_string; + } - # handle outdenting of long lines: - if ($outdent_long_lines) { - my $excess = - length($str) - - $side_comment_length + - $leading_space_count - - maximum_line_length_for_level($level); - if ( $excess > 0 ) { - $leading_space_count = 0; - $last_outdented_line_at = - $file_writer_object->get_output_line_number(); + sub set_last_nonblank_seqno_string { + my ($val) = @_; + $last_nonblank_seqno_string = $val; + return; + } - unless ($outdented_line_count) { - $first_outdented_line_at = $last_outdented_line_at; - } - $outdented_line_count++; - } + sub get_cached_line_flag { + return $cached_line_flag; } - # Make preliminary leading whitespace. It could get changed - # later by entabbing, so we have to keep track of any changes - # to the leading_space_count from here on. - my $leading_string = - $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; + sub get_cached_line_type { + return $cached_line_type; + } - # Unpack any recombination data; it was packed by - # sub send_lines_to_vertical_aligner. Contents: - # - # [0] type: 1=opening non-block 2=closing non-block - # 3=opening block brace 4=closing block brace - # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok - # if closing: spaces of padding to use - # [2] sequence number of container - # [3] valid flag: do not append if this flag is false - # - my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, - $seqno_end ); - if ($rvertical_tightness_flags) { - ( - $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, - $seqno_end - ) = @{$rvertical_tightness_flags}; + sub set_cached_line_valid { + my ($val) = @_; + $cached_line_valid = $val; + return; } - $seqno_string = $seqno_end; + sub get_cached_seqno { + return $cached_seqno; + } - # handle any cached line .. - # either append this line to it or write it out - if ( length($cached_line_text) ) { + sub initialize_step_B_cache { + + # valign_output_step_B cache: + $cached_line_text = ""; + $cached_line_text_length = 0; + $cached_line_type = 0; + $cached_line_flag = 0; + $cached_seqno = 0; + $cached_line_valid = 0; + $cached_line_leading_space_count = 0; + $cached_seqno_string = ""; + $cached_line_Kend = undef; + + # These vars hold a string of sequence numbers joined together used by + # the cache + $seqno_string = ""; + $last_nonblank_seqno_string = ""; + return; + } - # Dump an invalid cached line - if ( !$cached_line_valid ) { - valign_output_step_C( $cached_line_text, + sub _flush_cache { + my ($self) = @_; + if ($cached_line_type) { + $seqno_string = $cached_seqno_string; + $self->valign_output_step_C( + $cached_line_text, $cached_line_leading_space_count, - $last_level_written ); + $self->[_last_level_written_], + $cached_line_Kend, + ); + $cached_line_type = 0; + $cached_line_text = ""; + $cached_line_text_length = 0; + $cached_seqno_string = ""; + $cached_line_Kend = undef; } + return; + } - # Handle cached line ending in OPENING tokens - elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { - - my $gap = $leading_space_count - length($cached_line_text); - - # handle option of just one tight opening per line: - if ( $cached_line_flag == 1 ) { - if ( defined($open_or_close) && $open_or_close == 1 ) { - $gap = -1; + sub valign_output_step_B { + + ############################################################### + # This is Step B in writing vertically aligned lines. + # Vertical tightness is applied according to preset flags. + # In particular this routine handles stacking of opening + # and closing tokens. + ############################################################### + + my ( $self, $rinput ) = @_; + + my $leading_space_count = $rinput->{leading_space_count}; + my $str = $rinput->{line}; + my $str_length = $rinput->{line_length}; + my $side_comment_length = $rinput->{side_comment_length}; + my $outdent_long_lines = $rinput->{outdent_long_lines}; + my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags}; + my $level = $rinput->{level}; + my $level_end = $rinput->{level_end}; + my $Kend = $rinput->{Kend}; + + my $last_level_written = $self->[_last_level_written_]; + + # Useful -gcs test cases for wide characters are + # perl527/(method.t.2, reg_mesg.t, mime-header.t) + + # handle outdenting of long lines: + my $is_outdented_line; + if ($outdent_long_lines) { + my $excess = + $str_length - + $side_comment_length + + $leading_space_count - + $self->maximum_line_length_for_level($level); + if ( $excess > 0 ) { + $leading_space_count = 0; + my $file_writer_object = $self->[_file_writer_object_]; + my $last_outdented_line_at = + $file_writer_object->get_output_line_number(); + $self->[_last_outdented_line_at_] = $last_outdented_line_at; + + my $outdented_line_count = $self->[_outdented_line_count_]; + unless ($outdented_line_count) { + $self->[_first_outdented_line_at_] = + $last_outdented_line_at; } + $outdented_line_count++; + $self->[_outdented_line_count_] = $outdented_line_count; + $is_outdented_line = 1; } + } + + # Make preliminary leading whitespace. It could get changed + # later by entabbing, so we have to keep track of any changes + # to the leading_space_count from here on. + my $leading_string = + $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; + my $leading_string_length = length($leading_string); + + # Unpack any recombination data; it was packed by + # sub send_lines_to_vertical_aligner. Contents: + # + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace + # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok + # if closing: spaces of padding to use + # [2] sequence number of container + # [3] valid flag: do not append if this flag is false + # + my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, + $seqno_end ); + if ($rvertical_tightness_flags) { + ( + $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, + $seqno_end + ) = @{$rvertical_tightness_flags}; + } - if ( $gap >= 0 && defined($seqno_beg) ) { - $leading_string = $cached_line_text . ' ' x $gap; - $leading_space_count = $cached_line_leading_space_count; - $seqno_string = $cached_seqno_string . ':' . $seqno_beg; - $level = $last_level_written; + $seqno_string = $seqno_end; + + # handle any cached line .. + # either append this line to it or write it out + # Note: the function length() is used in this next test out of caution. + # All testing has shown that the variable $cached_line_text_length is + # correct, but its calculation is complex and a loss of cached text + # would be a disaster. + if ( length($cached_line_text) ) { + + # Dump an invalid cached line + if ( !$cached_line_valid ) { + $self->valign_output_step_C( + $cached_line_text, $cached_line_leading_space_count, + $last_level_written, $cached_line_Kend + ); } - else { - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); + + # Handle cached line ending in OPENING tokens + elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { + + my $gap = $leading_space_count - $cached_line_text_length; + + # handle option of just one tight opening per line: + if ( $cached_line_flag == 1 ) { + if ( defined($open_or_close) && $open_or_close == 1 ) { + $gap = -1; + } + } + + # Do not join the lines if this might produce a one-line + # container which exceeds the maximum line length. This is + # necessary prevent blinking, particularly with the combination + # -xci -pvt=2. In that case a one-line block alternately forms + # and breaks, causing -xci to alternately turn on and off (case + # b765). + # Patched to fix cases b656 b862 b971 b972: always do the check + # if -vmll is set. The reason is that the -vmll option can + # cause changes in the maximum line length, leading to blinkers + # if not checked. + if ( + $gap >= 0 + && ( $self->[_rOpts_variable_maximum_line_length_] + || ( defined($level_end) && $level > $level_end ) ) + ) + { + my $test_line_length = + $cached_line_text_length + $gap + $str_length; + my $maximum_line_length = + $self->maximum_line_length_for_level($last_level_written); + + # Add a small tolerance in the length test (fixes case b862) + if ( $test_line_length > $maximum_line_length - 2 ) { + $gap = -1; + } + } + + if ( $gap >= 0 && defined($seqno_beg) ) { + $leading_string = $cached_line_text . ' ' x $gap; + $leading_string_length = $cached_line_text_length + $gap; + $leading_space_count = $cached_line_leading_space_count; + $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + $level = $last_level_written; + } + else { + $self->valign_output_step_C( + $cached_line_text, $cached_line_leading_space_count, + $last_level_written, $cached_line_Kend + ); + } } - } - # Handle cached line ending in CLOSING tokens - else { - my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; - if ( + # Handle cached line ending in CLOSING tokens + else { + my $test_line = + $cached_line_text . ' ' x $cached_line_flag . $str; + my $test_line_length = + $cached_line_text_length + $cached_line_flag + $str_length; + if ( - # The new line must start with container - $seqno_beg + # The new line must start with container + $seqno_beg - # The container combination must be okay.. - && ( + # The container combination must be okay.. + && ( - # okay to combine like types - ( $open_or_close == $cached_line_type ) + # okay to combine like types + ( $open_or_close == $cached_line_type ) - # closing block brace may append to non-block - || ( $cached_line_type == 2 && $open_or_close == 4 ) + # closing block brace may append to non-block + || ( $cached_line_type == 2 && $open_or_close == 4 ) - # something like ');' - || ( !$open_or_close && $cached_line_type == 2 ) + # something like ');' + || ( !$open_or_close && $cached_line_type == 2 ) - ) + ) - # The combined line must fit - && ( - length($test_line) <= - maximum_line_length_for_level($last_level_written) ) - ) - { + # The combined line must fit + && ( + $test_line_length <= + $self->maximum_line_length_for_level( + $last_level_written) + ) + ) + { - $seqno_string = $cached_seqno_string . ':' . $seqno_beg; - - # Patch to outdent closing tokens ending # in ');' - # If we are joining a line like ');' to a previous stacked - # set of closing tokens, then decide if we may outdent the - # combined stack to the indentation of the ');'. Since we - # should not normally outdent any of the other tokens more than - # the indentation of the lines that contained them, we will - # only do this if all of the corresponding opening - # tokens were on the same line. This can happen with - # -sot and -sct. For example, it is ok here: - # __PACKAGE__->load_components( qw( - # PK::Auto - # Core - # )); - # - # But, for example, we do not outdent in this example because - # that would put the closing sub brace out farther than the - # opening sub brace: - # - # perltidy -sot -sct - # $c->Tk::bind( - # '' => sub { - # my ($c) = @_; - # my $e = $c->XEvent; - # itemsUnderArea $c; - # } ); - # - if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { - - # The way to tell this is if the stacked sequence numbers - # of this output line are the reverse of the stacked - # sequence numbers of the previous non-blank line of - # sequence numbers. So we can join if the previous - # nonblank string of tokens is the mirror image. For - # example if stack )}] is 13:8:6 then we are looking for a - # leading stack like [{( which is 6:8:13 We only need to - # check the two ends, because the intermediate tokens must - # fall in order. Note on speed: having to split on colons - # and eliminate multiple colons might appear to be slow, - # but it's not an issue because we almost never come - # through here. In a typical file we don't. - $seqno_string =~ s/^:+//; - $last_nonblank_seqno_string =~ s/^:+//; - $seqno_string =~ s/:+/:/g; - $last_nonblank_seqno_string =~ s/:+/:/g; - - # how many spaces can we outdent? - my $diff = - $cached_line_leading_space_count - $leading_space_count; - if ( $diff > 0 - && length($seqno_string) - && length($last_nonblank_seqno_string) == - length($seqno_string) ) + $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + + # Patch to outdent closing tokens ending # in ');' If we + # are joining a line like ');' to a previous stacked set of + # closing tokens, then decide if we may outdent the + # combined stack to the indentation of the ');'. Since we + # should not normally outdent any of the other tokens more + # than the indentation of the lines that contained them, we + # will only do this if all of the corresponding opening + # tokens were on the same line. This can happen with -sot + # and -sct. + + # For example, it is ok here: + # __PACKAGE__->load_components( qw( + # PK::Auto + # Core + # )); + # + # But, for example, we do not outdent in this example + # because that would put the closing sub brace out farther + # than the opening sub brace: + # + # perltidy -sot -sct + # $c->Tk::bind( + # '' => sub { + # my ($c) = @_; + # my $e = $c->XEvent; + # itemsUnderArea $c; + # } ); + # + if ( $str =~ /^\);/ + && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { - my @seqno_last = - ( split /:/, $last_nonblank_seqno_string ); - my @seqno_now = ( split /:/, $seqno_string ); - if ( @seqno_now - && @seqno_last - && $seqno_now[-1] == $seqno_last[0] - && $seqno_now[0] == $seqno_last[-1] ) - { - # OK to outdent .. - # for absolute safety, be sure we only remove - # whitespace - my $ws = substr( $test_line, 0, $diff ); - if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { - - $test_line = substr( $test_line, $diff ); - $cached_line_leading_space_count -= $diff; - $last_level_written = - level_change( - $cached_line_leading_space_count, - $diff, $last_level_written ); - reduce_valign_buffer_indentation($diff); + # The way to tell this is if the stacked sequence + # numbers of this output line are the reverse of the + # stacked sequence numbers of the previous non-blank + # line of sequence numbers. So we can join if the + # previous nonblank string of tokens is the mirror + # image. For example if stack )}] is 13:8:6 then we + # are looking for a leading stack like [{( which + # is 6:8:13. We only need to check the two ends, + # because the intermediate tokens must fall in order. + # Note on speed: having to split on colons and + # eliminate multiple colons might appear to be slow, + # but it's not an issue because we almost never come + # through here. In a typical file we don't. + + $seqno_string =~ s/^:+//; + $last_nonblank_seqno_string =~ s/^:+//; + $seqno_string =~ s/:+/:/g; + $last_nonblank_seqno_string =~ s/:+/:/g; + + # how many spaces can we outdent? + my $diff = + $cached_line_leading_space_count - + $leading_space_count; + if ( $diff > 0 + && length($seqno_string) + && length($last_nonblank_seqno_string) == + length($seqno_string) ) + { + my @seqno_last = + ( split /:/, $last_nonblank_seqno_string ); + my @seqno_now = ( split /:/, $seqno_string ); + if ( @seqno_now + && @seqno_last + && $seqno_now[-1] == $seqno_last[0] + && $seqno_now[0] == $seqno_last[-1] ) + { + + # OK to outdent .. + # for absolute safety, be sure we only remove + # whitespace + my $ws = substr( $test_line, 0, $diff ); + if ( ( length($ws) == $diff ) + && $ws =~ /^\s+$/ ) + { + + $test_line = substr( $test_line, $diff ); + $cached_line_leading_space_count -= $diff; + $last_level_written = + $self->level_change( + $cached_line_leading_space_count, + $diff, $last_level_written ); + $self->reduce_valign_buffer_indentation( + $diff); + } + + # shouldn't happen, but not critical: + ##else { + ## ERROR transferring indentation here + ##} } - - # shouldn't happen, but not critical: - ##else { - ## ERROR transferring indentation here - ##} } } - } - $str = $test_line; - $leading_string = ""; - $leading_space_count = $cached_line_leading_space_count; - $level = $last_level_written; + $str = $test_line; + $str_length = $test_line_length; + $leading_string = ""; + $leading_string_length = 0; + $leading_space_count = $cached_line_leading_space_count; + $level = $last_level_written; + } + else { + $self->valign_output_step_C( + $cached_line_text, $cached_line_leading_space_count, + $last_level_written, $cached_line_Kend + ); + } } - else { - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); + } + $cached_line_type = 0; + $cached_line_text = ""; + $cached_line_text_length = 0; + $cached_line_Kend = undef; + + # make the line to be written + my $line = $leading_string . $str; + my $line_length = $leading_string_length + $str_length; + + # Safety check: be sure that a line to be cached as a stacked block + # brace line ends in the appropriate opening or closing block brace. + # This should always be the case if the caller set flags correctly. + # Code '3' is for -sobb, code '4' is for -scbb. + if ($open_or_close) { + if ( $open_or_close == 3 && $line !~ /\{\s*$/ + || $open_or_close == 4 && $line !~ /\}\s*$/ ) + { + $open_or_close = 0; } } - } - $cached_line_type = 0; - $cached_line_text = ""; - # make the line to be written - my $line = $leading_string . $str; + # write or cache this line ... + # fix for case b999: do not cache an outdented line + if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line ) + { + $self->valign_output_step_C( $line, $leading_space_count, $level, + $Kend ); + } + else { + $cached_line_text = $line; + $cached_line_text_length = $line_length; + $cached_line_type = $open_or_close; + $cached_line_flag = $tightness_flag; + $cached_seqno = $seqno; + $cached_line_valid = $valid; + $cached_line_leading_space_count = $leading_space_count; + $cached_seqno_string = $seqno_string; + $cached_line_Kend = $Kend; + } - # write or cache this line - if ( !$open_or_close || $side_comment_length > 0 ) { - valign_output_step_C( $line, $leading_space_count, $level ); + $self->[_last_level_written_] = $level; + $self->[_last_side_comment_length_] = $side_comment_length; + return; } - else { - $cached_line_text = $line; - $cached_line_type = $open_or_close; - $cached_line_flag = $tightness_flag; - $cached_seqno = $seqno; - $cached_line_valid = $valid; - $cached_line_leading_space_count = $leading_space_count; - $cached_seqno_string = $seqno_string; - } - - $last_level_written = $level; - $last_side_comment_length = $side_comment_length; - $extra_indent_ok = 0; - return; } -sub valign_output_step_C { +############################### +# CODE SECTION 8: Output Step C +############################### - ############################################################### - # This is Step C in writing vertically aligned lines. - # Lines are either stored in a buffer or passed along to the next step. - # The reason for storing lines is that we may later want to reduce their - # indentation when -sot and -sct are both used. - ############################################################### - my @args = @_; +{ ## closure for sub valign_output_step_C - # Dump any saved lines if we see a line with an unbalanced opening or - # closing token. - dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); + # Vertical alignment buffer used by valign_output_step_C + my $valign_buffer_filling; + my @valign_buffer; - # Either store or write this line - if ($valign_buffer_filling) { - push @valign_buffer, [@args]; + sub initialize_valign_buffer { + @valign_buffer = (); + $valign_buffer_filling = ""; + return; } - else { - valign_output_step_D(@args); + + sub dump_valign_buffer { + my ($self) = @_; + if (@valign_buffer) { + foreach (@valign_buffer) { + $self->valign_output_step_D( @{$_} ); + } + @valign_buffer = (); + } + $valign_buffer_filling = ""; + return; + } + + sub reduce_valign_buffer_indentation { + + my ( $self, $diff ) = @_; + if ( $valign_buffer_filling && $diff ) { + my $max_valign_buffer = @valign_buffer; + foreach my $i ( 0 .. $max_valign_buffer - 1 ) { + my ( $line, $leading_space_count, $level, $Kend ) = + @{ $valign_buffer[$i] }; + my $ws = substr( $line, 0, $diff ); + if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { + $line = substr( $line, $diff ); + } + if ( $leading_space_count >= $diff ) { + $leading_space_count -= $diff; + $level = + $self->level_change( $leading_space_count, $diff, + $level ); + } + $valign_buffer[$i] = + [ $line, $leading_space_count, $level, $Kend ]; + } + } + return; } - # For lines starting or ending with opening or closing tokens.. - if ($seqno_string) { - $last_nonblank_seqno_string = $seqno_string; + sub valign_output_step_C { - # Start storing lines when we see a line with multiple stacked opening - # tokens. - # patch for RT #94354, requested by Colin Williams - if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ ) - { + ############################################################### + # This is Step C in writing vertically aligned lines. + # Lines are either stored in a buffer or passed along to the next step. + # The reason for storing lines is that we may later want to reduce their + # indentation when -sot and -sct are both used. + ############################################################### + my ( $self, @args ) = @_; + + my $seqno_string = get_seqno_string(); + my $last_nonblank_seqno_string = get_last_nonblank_seqno_string(); + + # Dump any saved lines if we see a line with an unbalanced opening or + # closing token. + $self->dump_valign_buffer() + if ( $seqno_string && $valign_buffer_filling ); + + # Either store or write this line + if ($valign_buffer_filling) { + push @valign_buffer, [@args]; + } + else { + $self->valign_output_step_D(@args); + } + + # For lines starting or ending with opening or closing tokens.. + if ($seqno_string) { + $last_nonblank_seqno_string = $seqno_string; + set_last_nonblank_seqno_string($seqno_string); + + # Start storing lines when we see a line with multiple stacked + # opening tokens. + # patch for RT #94354, requested by Colin Williams + if ( $seqno_string =~ /^\d+(\:+\d+)+$/ + && $args[0] !~ /^[\}\)\]\:\?]/ ) + { - # This test is efficient but a little subtle: The first test says - # that we have multiple sequence numbers and hence multiple opening - # or closing tokens in this line. The second part of the test - # rejects stacked closing and ternary tokens. So if we get here - # then we should have stacked unbalanced opening tokens. + # This test is efficient but a little subtle: The first test + # says that we have multiple sequence numbers and hence + # multiple opening or closing tokens in this line. The second + # part of the test rejects stacked closing and ternary tokens. + # So if we get here then we should have stacked unbalanced + # opening tokens. - # Here is a complex example: + # Here is a complex example: - # Foo($Bar[0], { # (side comment) - # baz => 1, - # }); + # Foo($Bar[0], { # (side comment) + # baz => 1, + # }); - # The first line has sequence 6::4. It does not begin with - # a closing token or ternary, so it passes the test and must be - # stacked opening tokens. + # The first line has sequence 6::4. It does not begin with + # a closing token or ternary, so it passes the test and must be + # stacked opening tokens. - # The last line has sequence 4:6 but is a stack of closing tokens, - # so it gets rejected. + # The last line has sequence 4:6 but is a stack of closing + # tokens, so it gets rejected. - # Note that the sequence number of an opening token for a qw quote - # is a negative number and will be rejected. - # For example, for the following line: - # skip_symbols([qw( - # $seqno_string='10:5:-1'. It would be okay to accept it but - # I decided not to do this after testing. + # Note that the sequence number of an opening token for a qw + # quote is a negative number and will be rejected. For + # example, for the following line: skip_symbols([qw( + # $seqno_string='10:5:-1'. It would be okay to accept it but I + # decided not to do this after testing. - $valign_buffer_filling = $seqno_string; + $valign_buffer_filling = $seqno_string; + } } + return; } - return; } +############################### +# CODE SECTION 9: Output Step D +############################### + sub valign_output_step_D { ############################################################### # This is Step D in writing vertically aligned lines. + # It is the end of the vertical alignment pipeline. # Write one vertically aligned line of code to the output object. ############################################################### - my ( $line, $leading_space_count, $level ) = @_; + my ( $self, $line, $leading_space_count, $level, $Kend ) = @_; # The line is currently correct if there is no tabbing (recommended!) # We may have to lop off some leading spaces and replace with tabs. if ( $leading_space_count > 0 ) { + my $rOpts_indent_columns = $self->[_rOpts_indent_columns_]; + my $rOpts_tabs = $self->[_rOpts_tabs_]; + my $rOpts_entab_leading_whitespace = + $self->[_rOpts_entab_leading_whitespace_]; + # Nothing to do if no tabs if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) || $rOpts_indent_columns <= 0 ) @@ -3356,9 +5251,9 @@ sub valign_output_step_D { # Handle entab option elsif ($rOpts_entab_leading_whitespace) { - # Patch 12-nov-2018 based on report from Glenn. Extra padding was - # not correctly entabbed, nor were side comments: - # Increase leading space count for a padded line to get correct tabbing + # Patch 12-nov-2018 based on report from Glenn. Extra padding was + # not correctly entabbed, nor were side comments: Increase leading + # space count for a padded line to get correct tabbing if ( $line =~ /^(\s+)(.*)$/ ) { my $spaces = length($1); if ( $spaces > $leading_space_count ) { @@ -3378,8 +5273,8 @@ sub valign_output_step_D { # shouldn't happen - program error counting whitespace # - skip entabbing - VALIGN_DEBUG_FLAG_TABS - && warning( + DEBUG_TABS + && $self->warning( "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" ); } @@ -3396,9 +5291,9 @@ sub valign_output_step_D { # But it could be an outdented comment if ( $line !~ /^\s*#/ ) { - VALIGN_DEBUG_FLAG_TABS - && warning( -"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" + DEBUG_TABS + && $self->warning( +"Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n" ); } $leading_string = ( ' ' x $leading_space_count ); @@ -3413,25 +5308,32 @@ sub valign_output_step_D { # shouldn't happen - program error counting whitespace # we'll skip entabbing - VALIGN_DEBUG_FLAG_TABS - && warning( + DEBUG_TABS + && $self->warning( "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" ); } } } - $file_writer_object->write_code_line( $line . "\n" ); + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->write_code_line( $line . "\n", $Kend ); + return; } -{ # begin get_leading_string +{ ## closure for sub get_leading_string my @leading_string_cache; + sub initialize_leading_string_cache { + @leading_string_cache = (); + return; + } + sub get_leading_string { # define the leading whitespace string for this line.. - my $leading_whitespace_count = shift; + my ( $self, $leading_whitespace_count, $group_level ) = @_; # Handle case of zero whitespace, which includes multi-line quotes # (which may have a finite level; this prevents tab problems) @@ -3448,6 +5350,11 @@ sub valign_output_step_D { my $leading_string; # Handle simple case of no tabs + my $rOpts_indent_columns = $self->[_rOpts_indent_columns_]; + my $rOpts_tabs = $self->[_rOpts_tabs_]; + my $rOpts_entab_leading_whitespace = + $self->[_rOpts_entab_leading_whitespace_]; + if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) || $rOpts_indent_columns <= 0 ) { @@ -3471,8 +5378,8 @@ sub valign_output_step_D { # shouldn't happen: if ( $space_count < 0 ) { - VALIGN_DEBUG_FLAG_TABS - && warning( + DEBUG_TABS + && $self->warning( "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" ); @@ -3488,22 +5395,30 @@ sub valign_output_step_D { } } # end get_leading_string +########################## +# CODE SECTION 10: Summary +########################## + sub report_anything_unusual { my $self = shift; + + my $outdented_line_count = $self->[_outdented_line_count_]; if ( $outdented_line_count > 0 ) { - write_logfile_entry( + $self->write_logfile_entry( "$outdented_line_count long lines were outdented:\n"); - write_logfile_entry( + my $first_outdented_line_at = $self->[_first_outdented_line_at_]; + $self->write_logfile_entry( " First at output line $first_outdented_line_at\n"); if ( $outdented_line_count > 1 ) { - write_logfile_entry( + my $last_outdented_line_at = $self->[_last_outdented_line_at_]; + $self->write_logfile_entry( " Last at output line $last_outdented_line_at\n"); } - write_logfile_entry( + $self->write_logfile_entry( " use -noll to prevent outdenting, -l=n to increase line length\n" ); - write_logfile_entry("\n"); + $self->write_logfile_entry("\n"); } return; }