lib/Perl/Tidy/VerticalAligner.pm

   1 package Perl::Tidy::VerticalAligner;
   2 use strict;
   3 use warnings;
   4 use Carp;
   5 use English qw( -no_match_vars );
   6 our $VERSION = '20220613';
   7 use Perl::Tidy::VerticalAligner::Alignment;
   8 use Perl::Tidy::VerticalAligner::Line;
   9
  10 use constant DEVEL_MODE   => 0;
  11 use constant EMPTY_STRING => q{};
  12 use constant SPACE        => q{ };
  13
  14 # The Perl::Tidy::VerticalAligner package collects output lines and
  15 # attempts to line up certain common tokens, such as => and #, which are
  16 # identified by the calling routine.
  17 #
  18 # Usage:
  19 #   - Initiate an object with a call to new().
  20 #   - Write lines one-by-one with calls to valign_input().
  21 #   - Make a final call to flush() to empty the pipeline.
  22 #
  23 # The sub valign_input collects lines into groups.  When a group reaches
  24 # the maximum possible size it is processed for alignment and output.
  25 # The maximum group size is reached whenever there is a change in indentation
  26 # level, a blank line, a block comment, or an external flush call.  The calling
  27 # routine may also force a break in alignment at any time.
  28 #
  29 # If the calling routine needs to interrupt the output and send other text to
  30 # the output, it must first call flush() to empty the output pipeline.  This
  31 # might occur for example if a block of pod text needs to be sent to the output
  32 # between blocks of code.
  33
  34 # It is essential that a final call to flush() be made. Otherwise some
  35 # final lines of text will be lost.
  36
  37 # Index...
  38 # CODE SECTION 1: Preliminary code, global definitions and sub new
  39 #                 sub new
  40 # CODE SECTION 2: Some Basic Utilities
  41 # CODE SECTION 3: Code to accept input and form groups
  42 #                 sub valign_input
  43 # CODE SECTION 4: Code to process comment lines
  44 #                 sub _flush_comment_lines
  45 # CODE SECTION 5: Code to process groups of code lines
  46 #                 sub _flush_group_lines
  47 # CODE SECTION 6: Output Step A
  48 #                 sub valign_output_step_A
  49 # CODE SECTION 7: Output Step B
  50 #                 sub valign_output_step_B
  51 # CODE SECTION 8: Output Step C
  52 #                 sub valign_output_step_C
  53 # CODE SECTION 9: Output Step D
  54 #                 sub valign_output_step_D
  55 # CODE SECTION 10: Summary
  56 #                 sub report_anything_unusual
  57
  58 ##################################################################
  59 # CODE SECTION 1: Preliminary code, global definitions and sub new
  60 ##################################################################
  61
  62 sub AUTOLOAD {
  63
  64     # Catch any undefined sub calls so that we are sure to get
  65     # some diagnostic information.  This sub should never be called
  66     # except for a programming error.
  67     our $AUTOLOAD;
  68     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  69     my ( $pkg, $fname, $lno ) = caller();
  70     my $my_package = __PACKAGE__;
  71     print STDERR <<EOM;
  72 ======================================================================
  73 Error detected in package '$my_package', version $VERSION
  74 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  75 Called from package: '$pkg'
  76 Called from File '$fname'  at line '$lno'
  77 This error is probably due to a recent programming change
  78 ======================================================================
  79 EOM
  80     exit 1;
  81 }
  82
  83 sub DESTROY {
  84
  85     # required to avoid call to AUTOLOAD in some versions of perl
  86 }
  87
  88 sub Die {
  89     my ($msg) = @_;
  90     Perl::Tidy::Die($msg);
  91     croak "unexpected return from Perl::Tidy::Die";
  92 }
  93
  94 sub Fault {
  95     my ($msg) = @_;
  96
  97     # This routine is called for errors that really should not occur
  98     # except if there has been a bug introduced by a recent program change.
  99     # Please add comments at calls to Fault to explain why the call
 100     # should not occur, and where to look to fix it.
 101     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
 102     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 103     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 104     my $input_stream_name = get_input_stream_name();
 105
 106     Die(<<EOM);
 107 ==============================================================================
 108 While operating on input stream with name: '$input_stream_name'
 109 A fault was detected at line $line0 of sub '$subroutine1'
 110 in file '$filename1'
 111 which was called from line $line1 of sub '$subroutine2'
 112 Message: '$msg'
 113 This is probably an error introduced by a recent programming change.
 114 Perl::Tidy::VerticalAligner.pm reports VERSION='$VERSION'.
 115 ==============================================================================
 116 EOM
 117
 118     # We shouldn't get here, but this return is to keep Perl-Critic from
 119     # complaining.
 120     return;
 121 }
 122
 123 BEGIN {
 124
 125     # Define the fixed indexes for variables in $self, which is an array
 126     # reference.  Note the convention of leading and trailing underscores to
 127     # keep them unique.
 128     # Do not combine with other BEGIN blocks (c101).
 129     my $i = 0;
 130     use constant {
 131         _file_writer_object_ => $i++,
 132         _logger_object_      => $i++,
 133         _diagnostics_object_ => $i++,
 134         _length_function_    => $i++,
 135
 136         _rOpts_                             => $i++,
 137         _rOpts_indent_columns_              => $i++,
 138         _rOpts_tabs_                        => $i++,
 139         _rOpts_entab_leading_whitespace_    => $i++,
 140         _rOpts_fixed_position_side_comment_ => $i++,
 141         _rOpts_minimum_space_to_comment_    => $i++,
 142         _rOpts_valign_code_                 => $i++,
 143         _rOpts_valign_block_comments_       => $i++,
 144         _rOpts_valign_side_comments_        => $i++,
 145
 146         _last_level_written_            => $i++,
 147         _last_side_comment_column_      => $i++,
 148         _last_side_comment_line_number_ => $i++,
 149         _last_side_comment_length_      => $i++,
 150         _last_side_comment_level_       => $i++,
 151         _outdented_line_count_          => $i++,
 152         _first_outdented_line_at_       => $i++,
 153         _last_outdented_line_at_        => $i++,
 154         _consecutive_block_comments_    => $i++,
 155
 156         _rgroup_lines_                => $i++,
 157         _group_level_                 => $i++,
 158         _group_type_                  => $i++,
 159         _group_maximum_line_length_   => $i++,
 160         _zero_count_                  => $i++,
 161         _last_leading_space_count_    => $i++,
 162         _comment_leading_space_count_ => $i++,
 163     };
 164
 165     # Debug flag. This is a relic from the original program development
 166     # looking for problems with tab characters.  Caution: this debug flag can
 167     # produce a lot of output It should be 0 except when debugging small
 168     # scripts.
 169
 170     use constant DEBUG_TABS => 0;
 171
 172     my $debug_warning = sub {
 173         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
 174         return;
 175     };
 176
 177     DEBUG_TABS && $debug_warning->('TABS');
 178 }
 179
 180 # GLOBAL variables
 181 my (
 182
 183     %valign_control_hash,
 184     $valign_control_default,
 185
 186 );
 187
 188 sub check_options {
 189
 190     # This routine is called to check the user-supplied run parameters
 191     # and to configure the control hashes to them.
 192     my ($rOpts) = @_;
 193
 194     # All alignments are done by default
 195     %valign_control_hash    = ();
 196     $valign_control_default = 1;
 197
 198     # If -vil=s is entered without -vxl, assume -vxl='*'
 199     if (  !$rOpts->{'valign-exclusion-list'}
 200         && $rOpts->{'valign-inclusion-list'} )
 201     {
 202         $rOpts->{'valign-exclusion-list'} = '*';
 203     }
 204
 205     # See if the user wants to exclude any alignment types ...
 206     if ( $rOpts->{'valign-exclusion-list'} ) {
 207
 208         # The inclusion list is only relevant if there is an exclusion list
 209         if ( $rOpts->{'valign-inclusion-list'} ) {
 210             my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
 211             @valign_control_hash{@vil} = (1) x scalar(@vil);
 212         }
 213
 214         # Note that the -vxl list is done after -vil, so -vxl has priority
 215         # in the event of duplicate entries.
 216         my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
 217         @valign_control_hash{@vxl} = (0) x scalar(@vxl);
 218
 219         # Optimization: revert to defaults if no exclusions.
 220         # This could happen with -vxl='  ' and any -vil list
 221         if ( !@vxl ) {
 222             %valign_control_hash = ();
 223         }
 224
 225         # '$valign_control_default' applies to types not in the hash:
 226         # - If a '*' was entered then set it to be that default type
 227         # - Otherwise, leave it set it to 1
 228         if ( defined( $valign_control_hash{'*'} ) ) {
 229             $valign_control_default = $valign_control_hash{'*'};
 230         }
 231
 232         # Side comments are controlled separately and must be removed
 233         # if given in a list.
 234         if (%valign_control_hash) {
 235             $valign_control_hash{'#'} = 1;
 236         }
 237     }
 238
 239     return;
 240 }
 241
 242 sub new {
 243
 244     my ( $class, @args ) = @_;
 245
 246     my %defaults = (
 247         rOpts              => undef,
 248         file_writer_object => undef,
 249         logger_object      => undef,
 250         diagnostics_object => undef,
 251         length_function    => sub { return length( $_[0] ) },
 252     );
 253     my %args = ( %defaults, @args );
 254
 255     # Initialize other caches and buffers
 256     initialize_step_B_cache();
 257     initialize_valign_buffer();
 258     initialize_leading_string_cache();
 259     initialize_decode();
 260     set_logger_object( $args{logger_object} );
 261
 262     # Initialize all variables in $self.
 263     # To add an item to $self, first define a new constant index in the BEGIN
 264     # section.
 265     my $self = [];
 266
 267     # objects
 268     $self->[_file_writer_object_] = $args{file_writer_object};
 269     $self->[_logger_object_]      = $args{logger_object};
 270     $self->[_diagnostics_object_] = $args{diagnostics_object};
 271     $self->[_length_function_]    = $args{length_function};
 272
 273     # shortcuts to user options
 274     my $rOpts = $args{rOpts};
 275
 276     $self->[_rOpts_]                = $rOpts;
 277     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
 278     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
 279     $self->[_rOpts_entab_leading_whitespace_] =
 280       $rOpts->{'entab-leading-whitespace'};
 281     $self->[_rOpts_fixed_position_side_comment_] =
 282       $rOpts->{'fixed-position-side-comment'};
 283     $self->[_rOpts_minimum_space_to_comment_] =
 284       $rOpts->{'minimum-space-to-comment'};
 285     $self->[_rOpts_valign_code_]           = $rOpts->{'valign-code'};
 286     $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
 287     $self->[_rOpts_valign_side_comments_]  = $rOpts->{'valign-side-comments'};
 288
 289     # Batch of lines being collected
 290     $self->[_rgroup_lines_]                = [];
 291     $self->[_group_level_]                 = 0;
 292     $self->[_group_type_]                  = EMPTY_STRING;
 293     $self->[_group_maximum_line_length_]   = undef;
 294     $self->[_zero_count_]                  = 0;
 295     $self->[_comment_leading_space_count_] = 0;
 296     $self->[_last_leading_space_count_]    = 0;
 297
 298     # Memory of what has been processed
 299     $self->[_last_level_written_]            = -1;
 300     $self->[_last_side_comment_column_]      = 0;
 301     $self->[_last_side_comment_line_number_] = 0;
 302     $self->[_last_side_comment_length_]      = 0;
 303     $self->[_last_side_comment_level_]       = -1;
 304     $self->[_outdented_line_count_]          = 0;
 305     $self->[_first_outdented_line_at_]       = 0;
 306     $self->[_last_outdented_line_at_]        = 0;
 307     $self->[_consecutive_block_comments_]    = 0;
 308
 309     bless $self, $class;
 310     return $self;
 311 }
 312
 313 #################################
 314 # CODE SECTION 2: Basic Utilities
 315 #################################
 316
 317 sub flush {
 318
 319     # flush() is the external call to completely empty the pipeline.
 320     my ($self) = @_;
 321
 322     # push things out the pipeline...
 323
 324     # push out any current group lines
 325     $self->_flush_group_lines();
 326
 327     # then anything left in the cache of step_B
 328     $self->_flush_cache();
 329
 330     # then anything left in the buffer of step_C
 331     $self->dump_valign_buffer();
 332
 333     return;
 334 }
 335
 336 sub initialize_for_new_group {
 337     my ($self) = @_;
 338
 339     $self->[_rgroup_lines_]                = [];
 340     $self->[_group_type_]                  = EMPTY_STRING;
 341     $self->[_zero_count_]                  = 0;
 342     $self->[_comment_leading_space_count_] = 0;
 343     $self->[_last_leading_space_count_]    = 0;
 344     $self->[_group_maximum_line_length_]   = undef;
 345
 346     # Note that the value for _group_level_ is
 347     # handled separately in sub valign_input
 348     return;
 349 }
 350
 351 sub group_line_count {
 352     return +@{ $_[0]->[_rgroup_lines_] };
 353 }
 354
 355 # interface to Perl::Tidy::Diagnostics routines
 356 # For debugging; not currently used
 357 sub write_diagnostics {
 358     my ( $self, $msg ) = @_;
 359     my $diagnostics_object = $self->[_diagnostics_object_];
 360     if ($diagnostics_object) {
 361         $diagnostics_object->write_diagnostics($msg);
 362     }
 363     return;
 364 }
 365
 366 {    ## begin closure for logger routines
 367     my $logger_object;
 368
 369     # Called once per file to initialize the logger object
 370     sub set_logger_object {
 371         $logger_object = shift;
 372         return;
 373     }
 374
 375     sub get_logger_object {
 376         return $logger_object;
 377     }
 378
 379     sub get_input_stream_name {
 380         my $input_stream_name = EMPTY_STRING;
 381         if ($logger_object) {
 382             $input_stream_name = $logger_object->get_input_stream_name();
 383         }
 384         return $input_stream_name;
 385     }
 386
 387     sub warning {
 388         my ($msg) = @_;
 389         if ($logger_object) {
 390             $logger_object->warning($msg);
 391         }
 392         return;
 393     }
 394
 395     sub write_logfile_entry {
 396         my ($msg) = @_;
 397         if ($logger_object) {
 398             $logger_object->write_logfile_entry($msg);
 399         }
 400         return;
 401     }
 402 }
 403
 404 sub get_cached_line_count {
 405     my $self = shift;
 406     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
 407 }
 408
 409 sub get_recoverable_spaces {
 410
 411     # return the number of spaces (+ means shift right, - means shift left)
 412     # that we would like to shift a group of lines with the same indentation
 413     # to get them to line up with their opening parens
 414     my $indentation = shift;
 415     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
 416 }
 417
 418 ######################################################
 419 # CODE SECTION 3: Code to accept input and form groups
 420 ######################################################
 421
 422 sub push_group_line {
 423
 424     my ( $self, $new_line ) = @_;
 425     my $rgroup_lines = $self->[_rgroup_lines_];
 426     push @{$rgroup_lines}, $new_line;
 427     return;
 428 }
 429
 430 use constant DEBUG_VALIGN      => 0;
 431 use constant SC_LONG_LINE_DIFF => 12;
 432
 433 my %is_closing_token;
 434
 435 BEGIN {
 436     my @q = qw< } ) ] >;
 437     @is_closing_token{@q} = (1) x scalar(@q);
 438 }
 439
 440 #--------------------------------------------
 441 # VTFLAGS: Vertical tightness types and flags
 442 #--------------------------------------------
 443 # Vertical tightness is controlled by a 'type' and associated 'flags' for each
 444 # line.  These values are set by sub Formatter::set_vertical_tightness_flags.
 445 # These are defined as follows:
 446
 447 # Vertical Tightness Line Type Codes:
 448 # Type 0, no vertical tightness condition
 449 # Type 1, last token of this line is a non-block opening token
 450 # Type 2, first token of next line is a non-block closing
 451 # Type 3, isolated opening block brace
 452 # type 4, isolated closing block brace
 453
 454 # Opening token flag values are the vertical tightness flags
 455 # 0 do not join with next line
 456 # 1 just one join per line
 457 # 2 any number of joins
 458
 459 # Closing token flag values indicate spacing:
 460 # 0 = no space added before closing token
 461 # 1 = single space added before closing token
 462
 463 sub valign_input {
 464
 465     # Place one line in the current vertical group.
 466     #
 467     # The key input parameters describing each line are:
 468     #     $level          = indentation level of this line
 469     #     $rfields        = ref to array of fields
 470     #     $rpatterns      = ref to array of patterns, one per field
 471     #     $rtokens        = ref to array of tokens starting fields 1,2,..
 472     #     $rfield_lengths = ref to array of field display widths
 473     #
 474     # Here is an example of what this package does.  In this example,
 475     # we are trying to line up both the '=>' and the '#'.
 476     #
 477     #         '18' => 'grave',    #   \`
 478     #         '19' => 'acute',    #   `'
 479     #         '20' => 'caron',    #   \v
 480     # <-tabs-><f1-><--field 2 ---><-f3->
 481     # |            |              |    |
 482     # |            |              |    |
 483     # col1        col2         col3 col4
 484     #
 485     # The calling routine has already broken the entire line into 3 fields as
 486     # indicated.  (So the work of identifying promising common tokens has
 487     # already been done).
 488     #
 489     # In this example, there will be 2 tokens being matched: '=>' and '#'.
 490     # They are the leading parts of fields 2 and 3, but we do need to know
 491     # what they are so that we can dump a group of lines when these tokens
 492     # change.
 493     #
 494     # The fields contain the actual characters of each field.  The patterns
 495     # are like the fields, but they contain mainly token types instead
 496     # of tokens, so they have fewer characters.  They are used to be
 497     # sure we are matching fields of similar type.
 498     #
 499     # In this example, there will be 4 column indexes being adjusted.  The
 500     # first one is always at zero.  The interior columns are at the start of
 501     # the matching tokens, and the last one tracks the maximum line length.
 502     #
 503     # Each time a new line comes in, it joins the current vertical
 504     # group if possible.  Otherwise it causes the current group to be flushed
 505     # and a new group is started.
 506     #
 507     # For each new group member, the column locations are increased, as
 508     # necessary, to make room for the new fields.  When the group is finally
 509     # output, these column numbers are used to compute the amount of spaces of
 510     # padding needed for each field.
 511     #
 512     # Programming note: the fields are assumed not to have any tab characters.
 513     # Tabs have been previously removed except for tabs in quoted strings and
 514     # side comments.  Tabs in these fields can mess up the column counting.
 515     # The log file warns the user if there are any such tabs.
 516
 517     my ( $self, $rline_hash ) = @_;
 518
 519     my $level                     = $rline_hash->{level};
 520     my $level_end                 = $rline_hash->{level_end};
 521     my $indentation               = $rline_hash->{indentation};
 522     my $list_seqno                = $rline_hash->{list_seqno};
 523     my $outdent_long_lines        = $rline_hash->{outdent_long_lines};
 524     my $is_terminal_ternary       = $rline_hash->{is_terminal_ternary};
 525     my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
 526     my $break_alignment_before    = $rline_hash->{break_alignment_before};
 527     my $break_alignment_after     = $rline_hash->{break_alignment_after};
 528     my $Kend                      = $rline_hash->{Kend};
 529     my $ci_level                  = $rline_hash->{ci_level};
 530     my $maximum_line_length       = $rline_hash->{maximum_line_length};
 531     my $forget_side_comment       = $rline_hash->{forget_side_comment};
 532     my $rline_alignment           = $rline_hash->{rline_alignment};
 533
 534     my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
 535       @{$rline_alignment};
 536
 537     # The index '$Kend' is a value which passed along with the line text to sub
 538     # 'write_code_line' for a convergence check.
 539
 540     # number of fields is $jmax
 541     # number of tokens between fields is $jmax-1
 542     my $jmax = @{$rfields} - 1;
 543
 544     my $leading_space_count =
 545       ref($indentation) ? $indentation->get_spaces() : $indentation;
 546
 547     # set outdented flag to be sure we either align within statements or
 548     # across statement boundaries, but not both.
 549     my $is_outdented =
 550       $self->[_last_leading_space_count_] > $leading_space_count;
 551     $self->[_last_leading_space_count_] = $leading_space_count;
 552
 553     # Identify a hanging side comment.  Hanging side comments have an empty
 554     # initial field.
 555     my $is_hanging_side_comment =
 556       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
 557
 558     # Undo outdented flag for a hanging side comment
 559     $is_outdented = 0 if $is_hanging_side_comment;
 560
 561     # Identify a block comment.
 562     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
 563
 564     # Block comment .. update count
 565     if ($is_block_comment) {
 566         $self->[_consecutive_block_comments_]++;
 567     }
 568
 569     # Not a block comment ..
 570     # Forget side comment column if we saw 2 or more block comments,
 571     # and reset the count
 572     else {
 573
 574         if ( $self->[_consecutive_block_comments_] > 1 ) {
 575             $self->forget_side_comment();
 576         }
 577         $self->[_consecutive_block_comments_] = 0;
 578     }
 579
 580     # Reset side comment location if we are entering a new block from level 0.
 581     # This is intended to keep them from drifting too far to the right.
 582     if ($forget_side_comment) {
 583         $self->forget_side_comment();
 584     }
 585
 586     my $is_balanced_line = $level_end == $level;
 587
 588     my $group_level               = $self->[_group_level_];
 589     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
 590
 591     DEBUG_VALIGN && do {
 592         my $nlines = $self->group_line_count();
 593         print STDOUT
 594 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
 595     };
 596
 597     # Validate cached line if necessary: If we can produce a container
 598     # with just 2 lines total by combining an existing cached opening
 599     # token with the closing token to follow, then we will mark both
 600     # cached flags as valid.
 601     my $cached_line_type = get_cached_line_type();
 602     if ($cached_line_type) {
 603         my $cached_line_opening_flag = get_cached_line_opening_flag();
 604         if ($rvertical_tightness_flags) {
 605             my $cached_seqno = get_cached_seqno();
 606             if (   $cached_seqno
 607                 && $rvertical_tightness_flags->{_vt_seqno}
 608                 && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
 609             {
 610
 611                 # Fix for b1187 and b1188: Normally this step is only done
 612                 # if the number of existing lines is 0 or 1.  But to prevent
 613                 # blinking, this range can be controlled by the caller.
 614                 # If zero values are given we fall back on the range 0 to 1.
 615                 my $line_count = $self->group_line_count();
 616                 my $min_lines  = $rvertical_tightness_flags->{_vt_min_lines};
 617                 my $max_lines  = $rvertical_tightness_flags->{_vt_max_lines};
 618                 $min_lines = 0 unless ($min_lines);
 619                 $max_lines = 1 unless ($max_lines);
 620                 if (   ( $line_count >= $min_lines )
 621                     && ( $line_count <= $max_lines ) )
 622                 {
 623                     $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
 624                     set_cached_line_valid(1);
 625                 }
 626             }
 627         }
 628
 629         # do not join an opening block brace (type 3, see VTFLAGS)
 630         # with an unbalanced line unless requested with a flag value of 2
 631         if (   $cached_line_type == 3
 632             && !$self->group_line_count()
 633             && $cached_line_opening_flag < 2
 634             && !$is_balanced_line )
 635         {
 636             set_cached_line_valid(0);
 637         }
 638     }
 639
 640     # shouldn't happen:
 641     if ( $level < 0 ) { $level = 0 }
 642
 643     # do not align code across indentation level changes
 644     # or changes in the maximum line length
 645     # or if vertical alignment is turned off
 646     if (
 647         $level != $group_level
 648         || (   $group_maximum_line_length
 649             && $maximum_line_length != $group_maximum_line_length )
 650         || $is_outdented
 651         || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
 652         || (   !$is_block_comment
 653             && !$self->[_rOpts_valign_side_comments_]
 654             && !$self->[_rOpts_valign_code_] )
 655       )
 656     {
 657
 658         $self->_flush_group_lines( $level - $group_level );
 659
 660         $group_level                         = $level;
 661         $self->[_group_level_]               = $group_level;
 662         $self->[_group_maximum_line_length_] = $maximum_line_length;
 663
 664         # Update leading spaces after the above flush because the leading space
 665         # count may have been changed if the -icp flag is in effect
 666         $leading_space_count =
 667           ref($indentation) ? $indentation->get_spaces() : $indentation;
 668     }
 669
 670     # --------------------------------------------------------------------
 671     # Collect outdentable block COMMENTS
 672     # --------------------------------------------------------------------
 673     my $is_blank_line = EMPTY_STRING;
 674     if ( $self->[_group_type_] eq 'COMMENT' ) {
 675         if (
 676             (
 677                    $is_block_comment
 678                 && $outdent_long_lines
 679                 && $leading_space_count ==
 680                 $self->[_comment_leading_space_count_]
 681             )
 682             || $is_blank_line
 683           )
 684         {
 685
 686             # Note that for a comment group we are not storing a line
 687             # but rather just the text and its length.
 688             $self->push_group_line(
 689                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 690             return;
 691         }
 692         else {
 693             $self->_flush_group_lines();
 694         }
 695     }
 696
 697     my $rgroup_lines = $self->[_rgroup_lines_];
 698     if ( $break_alignment_before && @{$rgroup_lines} ) {
 699         $rgroup_lines->[-1]->set_end_group(1);
 700     }
 701
 702     # --------------------------------------------------------------------
 703     # add dummy fields for terminal ternary
 704     # --------------------------------------------------------------------
 705     my $j_terminal_match;
 706
 707     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
 708         $j_terminal_match =
 709           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
 710             $rpatterns, $rfield_lengths, $group_level, );
 711         $jmax = @{$rfields} - 1;
 712     }
 713
 714     # --------------------------------------------------------------------
 715     # add dummy fields for else statement
 716     # --------------------------------------------------------------------
 717
 718     # Note the trailing space after 'else' here. If there were no space between
 719     # the else and the next '{' then we would not be able to do vertical
 720     # alignment of the '{'.
 721     if (   $rfields->[0] eq 'else '
 722         && @{$rgroup_lines}
 723         && $is_balanced_line )
 724     {
 725
 726         $j_terminal_match =
 727           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
 728             $rpatterns, $rfield_lengths );
 729         $jmax = @{$rfields} - 1;
 730     }
 731
 732     # --------------------------------------------------------------------
 733     # Handle simple line of code with no fields to match.
 734     # --------------------------------------------------------------------
 735     if ( $jmax <= 0 ) {
 736         $self->[_zero_count_]++;
 737
 738         if ( @{$rgroup_lines}
 739             && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
 740           )
 741         {
 742
 743             # flush the current group if it has some aligned columns..
 744             # or we haven't seen a comment lately
 745             if (   $rgroup_lines->[0]->get_jmax() > 1
 746                 || $self->[_zero_count_] > 3 )
 747             {
 748                 $self->_flush_group_lines();
 749             }
 750         }
 751
 752         # start new COMMENT group if this comment may be outdented
 753         if (   $is_block_comment
 754             && $outdent_long_lines
 755             && !$self->group_line_count() )
 756         {
 757             $self->[_group_type_]                  = 'COMMENT';
 758             $self->[_comment_leading_space_count_] = $leading_space_count;
 759             $self->[_group_maximum_line_length_]   = $maximum_line_length;
 760             $self->push_group_line(
 761                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 762             return;
 763         }
 764
 765         # just write this line directly if no current group, no side comment,
 766         # and no space recovery is needed.
 767         if (   !$self->group_line_count()
 768             && !get_recoverable_spaces($indentation) )
 769         {
 770
 771             $self->valign_output_step_B(
 772                 {
 773                     leading_space_count       => $leading_space_count,
 774                     line                      => $rfields->[0],
 775                     line_length               => $rfield_lengths->[0],
 776                     side_comment_length       => 0,
 777                     outdent_long_lines        => $outdent_long_lines,
 778                     rvertical_tightness_flags => $rvertical_tightness_flags,
 779                     level                     => $level,
 780                     level_end                 => $level_end,
 781                     Kend                      => $Kend,
 782                     maximum_line_length       => $maximum_line_length,
 783                 }
 784             );
 785             return;
 786         }
 787     }
 788     else {
 789         $self->[_zero_count_] = 0;
 790     }
 791
 792     # --------------------------------------------------------------------
 793     # It simplifies things to create a zero length side comment
 794     # if none exists.
 795     # --------------------------------------------------------------------
 796     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
 797         $jmax += 1;
 798         $rtokens->[ $jmax - 1 ]  = '#';
 799         $rfields->[$jmax]        = EMPTY_STRING;
 800         $rfield_lengths->[$jmax] = 0;
 801         $rpatterns->[$jmax]      = '#';
 802     }
 803
 804     # --------------------------------------------------------------------
 805     # create an object to hold this line
 806     # --------------------------------------------------------------------
 807     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
 808         {
 809             jmax                      => $jmax,
 810             rtokens                   => $rtokens,
 811             rfields                   => $rfields,
 812             rpatterns                 => $rpatterns,
 813             rfield_lengths            => $rfield_lengths,
 814             indentation               => $indentation,
 815             leading_space_count       => $leading_space_count,
 816             outdent_long_lines        => $outdent_long_lines,
 817             list_seqno                => $list_seqno,
 818             list_type                 => EMPTY_STRING,
 819             is_hanging_side_comment   => $is_hanging_side_comment,
 820             rvertical_tightness_flags => $rvertical_tightness_flags,
 821             is_terminal_ternary       => $is_terminal_ternary,
 822             j_terminal_match          => $j_terminal_match,
 823             end_group                 => $break_alignment_after,
 824             Kend                      => $Kend,
 825             ci_level                  => $ci_level,
 826             level                     => $level,
 827             level_end                 => $level_end,
 828             imax_pair                 => -1,
 829             maximum_line_length       => $maximum_line_length,
 830         }
 831     );
 832
 833     # --------------------------------------------------------------------
 834     # Decide if this is a simple list of items.
 835     # We use this to be less restrictive in deciding what to align.
 836     # --------------------------------------------------------------------
 837     decide_if_list($new_line) if ($list_seqno);
 838
 839     # --------------------------------------------------------------------
 840     # Append this line to the current group (or start new group)
 841     # --------------------------------------------------------------------
 842
 843     $self->push_group_line($new_line);
 844     $self->[_group_maximum_line_length_] = $maximum_line_length;
 845
 846     # output this group if it ends in a terminal else or ternary line
 847     if ( defined($j_terminal_match) ) {
 848         $self->_flush_group_lines();
 849     }
 850
 851     # Force break after jump to lower level
 852     elsif ($level_end < $level
 853         || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
 854     {
 855         $self->_flush_group_lines(-1);
 856     }
 857
 858     # --------------------------------------------------------------------
 859     # Some old debugging stuff
 860     # --------------------------------------------------------------------
 861     DEBUG_VALIGN && do {
 862         print STDOUT "exiting valign_input fields:";
 863         dump_array( @{$rfields} );
 864         print STDOUT "exiting valign_input tokens:";
 865         dump_array( @{$rtokens} );
 866         print STDOUT "exiting valign_input patterns:";
 867         dump_array( @{$rpatterns} );
 868     };
 869
 870     return;
 871 }
 872
 873 sub join_hanging_comment {
 874
 875     # Add dummy fields to a hanging side comment to make it look
 876     # like the first line in its potential group.  This simplifies
 877     # the coding.
 878     my ( $new_line, $old_line ) = @_;
 879
 880     my $jmax = $new_line->get_jmax();
 881
 882     # must be 2 fields
 883     return 0 unless $jmax == 1;
 884     my $rtokens = $new_line->get_rtokens();
 885
 886     # the second field must be a comment
 887     return 0 unless $rtokens->[0] eq '#';
 888     my $rfields = $new_line->get_rfields();
 889
 890     # the first field must be empty
 891     return 0 unless $rfields->[0] =~ /^\s*$/;
 892
 893     # the current line must have fewer fields
 894     my $maximum_field_index = $old_line->get_jmax();
 895     return 0
 896       unless $maximum_field_index > $jmax;
 897
 898     # looks ok..
 899     my $rpatterns      = $new_line->get_rpatterns();
 900     my $rfield_lengths = $new_line->get_rfield_lengths();
 901
 902     $new_line->set_is_hanging_side_comment(1);
 903     $jmax = $maximum_field_index;
 904     $new_line->set_jmax($jmax);
 905     $rfields->[$jmax]         = $rfields->[1];
 906     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
 907     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
 908     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
 909     foreach my $j ( 1 .. $jmax - 1 ) {
 910         $rfields->[$j]         = EMPTY_STRING;
 911         $rfield_lengths->[$j]  = 0;
 912         $rtokens->[ $j - 1 ]   = EMPTY_STRING;
 913         $rpatterns->[ $j - 1 ] = EMPTY_STRING;
 914     }
 915     return 1;
 916 }
 917
 918 {    ## closure for sub decide_if_list
 919
 920     my %is_comma_token;
 921
 922     BEGIN {
 923
 924         my @q = qw( => );
 925         push @q, ',';
 926         @is_comma_token{@q} = (1) x scalar(@q);
 927     }
 928
 929     sub decide_if_list {
 930
 931         my $line = shift;
 932
 933         # A list will be taken to be a line with a forced break in which all
 934         # of the field separators are commas or comma-arrows (except for the
 935         # trailing #)
 936
 937         my $rtokens    = $line->get_rtokens();
 938         my $test_token = $rtokens->[0];
 939         my ( $raw_tok, $lev, $tag, $tok_count ) =
 940           decode_alignment_token($test_token);
 941         if ( $is_comma_token{$raw_tok} ) {
 942             my $list_type = $test_token;
 943             my $jmax      = $line->get_jmax();
 944
 945             foreach ( 1 .. $jmax - 2 ) {
 946                 ( $raw_tok, $lev, $tag, $tok_count ) =
 947                   decode_alignment_token( $rtokens->[$_] );
 948                 if ( !$is_comma_token{$raw_tok} ) {
 949                     $list_type = EMPTY_STRING;
 950                     last;
 951                 }
 952             }
 953             $line->set_list_type($list_type);
 954         }
 955         return;
 956     }
 957 }
 958
 959 sub fix_terminal_ternary {
 960
 961     # Add empty fields as necessary to align a ternary term
 962     # like this:
 963     #
 964     #  my $leapyear =
 965     #      $year % 4   ? 0
 966     #    : $year % 100 ? 1
 967     #    : $year % 400 ? 0
 968     #    :               1;
 969     #
 970     # returns the index of the terminal question token, if any
 971
 972     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
 973         $group_level )
 974       = @_;
 975
 976     return unless ($old_line);
 977     use constant EXPLAIN_TERNARY => 0;
 978
 979     if (%valign_control_hash) {
 980         my $align_ok = $valign_control_hash{'?'};
 981         $align_ok = $valign_control_default unless defined($align_ok);
 982         return unless ($align_ok);
 983     }
 984
 985     my $jmax        = @{$rfields} - 1;
 986     my $rfields_old = $old_line->get_rfields();
 987
 988     my $rpatterns_old       = $old_line->get_rpatterns();
 989     my $rtokens_old         = $old_line->get_rtokens();
 990     my $maximum_field_index = $old_line->get_jmax();
 991
 992     # look for the question mark after the :
 993     my ($jquestion);
 994     my $depth_question;
 995     my $pad        = EMPTY_STRING;
 996     my $pad_length = 0;
 997     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
 998         my $tok = $rtokens_old->[$j];
 999         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1000         if ( $raw_tok eq '?' ) {
1001             $depth_question = $lev;
1002
1003             # depth must be correct
1004             next unless ( $depth_question eq $group_level );
1005
1006             $jquestion = $j;
1007             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1008                 $pad_length = length($1);
1009                 $pad        = SPACE x $pad_length;
1010             }
1011             else {
1012                 return;    # shouldn't happen
1013             }
1014             last;
1015         }
1016     }
1017     return unless ( defined($jquestion) );    # shouldn't happen
1018
1019     # Now splice the tokens and patterns of the previous line
1020     # into the else line to insure a match.  Add empty fields
1021     # as necessary.
1022     my $jadd = $jquestion;
1023
1024     # Work on copies of the actual arrays in case we have
1025     # to return due to an error
1026     my @fields        = @{$rfields};
1027     my @patterns      = @{$rpatterns};
1028     my @tokens        = @{$rtokens};
1029     my @field_lengths = @{$rfield_lengths};
1030
1031     EXPLAIN_TERNARY && do {
1032         local $LIST_SEPARATOR = '><';
1033         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
1034         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
1035         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1036         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
1037         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1038         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1039     };
1040
1041     # handle cases of leading colon on this line
1042     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1043
1044         my ( $colon, $therest ) = ( $1, $2 );
1045
1046         # Handle sub-case of first field with leading colon plus additional code
1047         # This is the usual situation as at the '1' below:
1048         #  ...
1049         #  : $year % 400 ? 0
1050         #  :               1;
1051         if ($therest) {
1052
1053             # Split the first field after the leading colon and insert padding.
1054             # Note that this padding will remain even if the terminal value goes
1055             # out on a separate line.  This does not seem to look to bad, so no
1056             # mechanism has been included to undo it.
1057             my $field1        = shift @fields;
1058             my $field_length1 = shift @field_lengths;
1059             my $len_colon     = length($colon);
1060             unshift @fields, ( $colon, $pad . $therest );
1061             unshift @field_lengths,
1062               ( $len_colon, $pad_length + $field_length1 - $len_colon );
1063
1064             # change the leading pattern from : to ?
1065             return unless ( $patterns[0] =~ s/^\:/?/ );
1066
1067             # install leading tokens and patterns of existing line
1068             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1069             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1070
1071             # insert appropriate number of empty fields
1072             splice( @fields,        1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1073             splice( @field_lengths, 1, 0, (0) x $jadd )            if $jadd;
1074         }
1075
1076         # handle sub-case of first field just equal to leading colon.
1077         # This can happen for example in the example below where
1078         # the leading '(' would create a new alignment token
1079         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1080         # :                        ( $mname = $name . '->' );
1081         else {
1082
1083             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
1084
1085             # prepend a leading ? onto the second pattern
1086             $patterns[1] = "?b" . $patterns[1];
1087
1088             # pad the second field
1089             $fields[1]        = $pad . $fields[1];
1090             $field_lengths[1] = $pad_length + $field_lengths[1];
1091
1092             # install leading tokens and patterns of existing line, replacing
1093             # leading token and inserting appropriate number of empty fields
1094             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
1095             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
1096             splice( @fields,        1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1097             splice( @field_lengths, 1, 0, (0) x $jadd )            if $jadd;
1098         }
1099     }
1100
1101     # Handle case of no leading colon on this line.  This will
1102     # be the case when -wba=':' is used.  For example,
1103     #  $year % 400 ? 0 :
1104     #                1;
1105     else {
1106
1107         # install leading tokens and patterns of existing line
1108         $patterns[0] = '?' . 'b' . $patterns[0];
1109         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1110         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1111
1112         # insert appropriate number of empty fields
1113         $jadd             = $jquestion + 1;
1114         $fields[0]        = $pad . $fields[0];
1115         $field_lengths[0] = $pad_length + $field_lengths[0];
1116         splice( @fields,        0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1117         splice( @field_lengths, 0, 0, (0) x $jadd )            if $jadd;
1118     }
1119
1120     EXPLAIN_TERNARY && do {
1121         local $LIST_SEPARATOR = '><';
1122         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
1123         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
1124         print STDOUT "MODIFIED FIELDS=<@fields>\n";
1125     };
1126
1127     # all ok .. update the arrays
1128     @{$rfields}        = @fields;
1129     @{$rtokens}        = @tokens;
1130     @{$rpatterns}      = @patterns;
1131     @{$rfield_lengths} = @field_lengths;
1132
1133     # force a flush after this line
1134     return $jquestion;
1135 }
1136
1137 sub fix_terminal_else {
1138
1139     # Add empty fields as necessary to align a balanced terminal
1140     # else block to a previous if/elsif/unless block,
1141     # like this:
1142     #
1143     #  if   ( 1 || $x ) { print "ok 13\n"; }
1144     #  else             { print "not ok 13\n"; }
1145     #
1146     # returns a positive value if the else block should be indented
1147     #
1148     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1149
1150     return unless ($old_line);
1151     my $jmax = @{$rfields} - 1;
1152     return unless ( $jmax > 0 );
1153
1154     if (%valign_control_hash) {
1155         my $align_ok = $valign_control_hash{'{'};
1156         $align_ok = $valign_control_default unless defined($align_ok);
1157         return unless ($align_ok);
1158     }
1159
1160     # check for balanced else block following if/elsif/unless
1161     my $rfields_old = $old_line->get_rfields();
1162
1163     # TBD: add handling for 'case'
1164     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1165
1166     # look for the opening brace after the else, and extract the depth
1167     my $tok_brace = $rtokens->[0];
1168     my $depth_brace;
1169     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1170
1171     # probably:  "else # side_comment"
1172     else { return }
1173
1174     my $rpatterns_old       = $old_line->get_rpatterns();
1175     my $rtokens_old         = $old_line->get_rtokens();
1176     my $maximum_field_index = $old_line->get_jmax();
1177
1178     # be sure the previous if/elsif is followed by an opening paren
1179     my $jparen    = 0;
1180     my $tok_paren = '(' . $depth_brace;
1181     my $tok_test  = $rtokens_old->[$jparen];
1182     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
1183
1184     # Now find the opening block brace
1185     my ($jbrace);
1186     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1187         my $tok = $rtokens_old->[$j];
1188         if ( $tok eq $tok_brace ) {
1189             $jbrace = $j;
1190             last;
1191         }
1192     }
1193     return unless ( defined($jbrace) );           # shouldn't happen
1194
1195     # Now splice the tokens and patterns of the previous line
1196     # into the else line to insure a match.  Add empty fields
1197     # as necessary.
1198     my $jadd = $jbrace - $jparen;
1199     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1200     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1201     splice( @{$rfields},        1, 0, (EMPTY_STRING) x $jadd );
1202     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1203
1204     # force a flush after this line if it does not follow a case
1205     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1206     else                                      { return $jbrace }
1207 }
1208
1209 my %is_closing_block_type;
1210
1211 BEGIN {
1212     @_ = qw< } ] >;
1213     @is_closing_block_type{@_} = (1) x scalar(@_);
1214 }
1215
1216 sub check_match {
1217
1218     # See if the current line matches the current vertical alignment group.
1219
1220     my ( $self, $new_line, $base_line, $prev_line ) = @_;
1221
1222     # Given:
1223     #  $new_line  = the line being considered for group inclusion
1224     #  $base_line = the first line of the current group
1225     #  $prev_line = the line just before $new_line
1226
1227     # returns a flag and a value as follows:
1228     #    return (0, $imax_align)     if the line does not match
1229     #    return (1, $imax_align)     if the line matches but does not fit
1230     #    return (2, $imax_align)     if the line matches and fits
1231
1232     # Returns '$imax_align' which is the index of the maximum matching token.
1233     # It will be used in the subsequent left-to-right sweep to align as many
1234     # tokens as possible for lines which partially match.
1235     my $imax_align = -1;
1236
1237     # variable $GoToMsg explains reason for no match, for debugging
1238     my $GoToMsg = EMPTY_STRING;
1239     use constant EXPLAIN_CHECK_MATCH => 0;
1240
1241     # This is a flag for testing alignment by sub sweep_left_to_right only.
1242     # This test can help find problems with the alignment logic.
1243     # This flag should normally be zero.
1244     use constant TEST_SWEEP_ONLY => 0;
1245
1246     my $jmax                = $new_line->get_jmax();
1247     my $maximum_field_index = $base_line->get_jmax();
1248
1249     my $jlimit = $jmax - 2;
1250     if ( $jmax > $maximum_field_index ) {
1251         $jlimit = $maximum_field_index - 2;
1252     }
1253
1254     if ( $new_line->get_is_hanging_side_comment() ) {
1255
1256         # HSC's can join the group if they fit
1257     }
1258
1259     # Everything else
1260     else {
1261
1262         # A group with hanging side comments ends with the first non hanging
1263         # side comment.
1264         if ( $base_line->get_is_hanging_side_comment() ) {
1265             $GoToMsg = "end of hanging side comments";
1266             goto NO_MATCH;
1267         }
1268
1269         # The number of tokens that this line shares with the previous line
1270         # has been stored with the previous line.  This value was calculated
1271         # and stored by sub 'match_line_pair'.
1272         $imax_align = $prev_line->get_imax_pair();
1273
1274         if ( $imax_align != $jlimit ) {
1275             $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1276             goto NO_MATCH;
1277         }
1278
1279     }
1280
1281     # The tokens match, but the lines must have identical number of
1282     # tokens to join the group.
1283     if ( $maximum_field_index != $jmax ) {
1284         $GoToMsg = "token count differs";
1285         goto NO_MATCH;
1286     }
1287
1288     # The tokens match. Now See if there is space for this line in the
1289     # current group.
1290     if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
1291
1292         EXPLAIN_CHECK_MATCH
1293           && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1294         return ( 2, $jlimit );
1295     }
1296     else {
1297
1298         EXPLAIN_CHECK_MATCH
1299           && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1300         return ( 1, $jlimit );
1301     }
1302
1303   NO_MATCH:
1304
1305     EXPLAIN_CHECK_MATCH
1306       && print
1307       "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1308
1309     return ( 0, $imax_align );
1310 }
1311
1312 sub check_fit {
1313
1314     my ( $self, $new_line, $old_line ) = @_;
1315
1316     # The new line has alignments identical to the current group. Now we have
1317     # to fit the new line into the group without causing a field to exceed the
1318     # line length limit.
1319     #   return true if successful
1320     #   return false if not successful
1321
1322     my $jmax                = $new_line->get_jmax();
1323     my $leading_space_count = $new_line->get_leading_space_count();
1324     my $rfield_lengths      = $new_line->get_rfield_lengths();
1325     my $padding_available   = $old_line->get_available_space_on_right();
1326     my $jmax_old            = $old_line->get_jmax();
1327     my $rtokens_old         = $old_line->get_rtokens();
1328
1329     # Safety check ... only lines with equal array sizes should arrive here
1330     # from sub check_match.  So if this error occurs, look at recent changes in
1331     # sub check_match.  It is only supposed to check the fit of lines with
1332     # identical numbers of alignment tokens.
1333     if ( $jmax_old ne $jmax ) {
1334
1335         warning(<<EOM);
1336 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1337 unexpected difference in array lengths: $jmax != $jmax_old
1338 EOM
1339         return;
1340     }
1341
1342     # Save current columns in case this line does not fit.
1343     my @alignments = $old_line->get_alignments();
1344     foreach my $alignment (@alignments) {
1345         $alignment->save_column();
1346     }
1347
1348     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
1349
1350     # Loop over all alignments ...
1351     my $maximum_field_index = $old_line->get_jmax();
1352     for my $j ( 0 .. $jmax ) {
1353
1354         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1355
1356         if ( $j == 0 ) {
1357             $pad += $leading_space_count;
1358         }
1359
1360         # Keep going if this field does not need any space.
1361         next if ( $pad < 0 );
1362
1363         # Revert to the starting state if does not fit
1364         if ( $pad > $padding_available ) {
1365
1366             ################################################
1367             # Line does not fit -- revert to starting state
1368             ################################################
1369             foreach my $alignment (@alignments) {
1370                 $alignment->restore_column();
1371             }
1372             return;
1373         }
1374
1375         # make room for this field
1376         $old_line->increase_field_width( $j, $pad );
1377         $padding_available -= $pad;
1378     }
1379
1380     ######################################
1381     # The line fits, the match is accepted
1382     ######################################
1383     return 1;
1384
1385 }
1386
1387 sub install_new_alignments {
1388
1389     my ($new_line) = @_;
1390
1391     my $jmax           = $new_line->get_jmax();
1392     my $rfield_lengths = $new_line->get_rfield_lengths();
1393     my $col            = $new_line->get_leading_space_count();
1394
1395     for my $j ( 0 .. $jmax ) {
1396         $col += $rfield_lengths->[$j];
1397
1398         # create initial alignments for the new group
1399         my $alignment =
1400           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1401         $new_line->set_alignment( $j, $alignment );
1402     }
1403     return;
1404 }
1405
1406 sub copy_old_alignments {
1407     my ( $new_line, $old_line ) = @_;
1408     my @new_alignments = $old_line->get_alignments();
1409     $new_line->set_alignments(@new_alignments);
1410     return;
1411 }
1412
1413 sub dump_array {
1414
1415     # debug routine to dump array contents
1416     local $LIST_SEPARATOR = ')(';
1417     print STDOUT "(@_)\n";
1418     return;
1419 }
1420
1421 sub level_change {
1422
1423     # compute decrease in level when we remove $diff spaces from the
1424     # leading spaces
1425     my ( $self, $leading_space_count, $diff, $level ) = @_;
1426
1427     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1428     if ($rOpts_indent_columns) {
1429         my $olev =
1430           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1431         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1432         $level -= ( $olev - $nlev );
1433         if ( $level < 0 ) { $level = 0 }
1434     }
1435     return $level;
1436 }
1437
1438 ###############################################
1439 # CODE SECTION 4: Code to process comment lines
1440 ###############################################
1441
1442 sub _flush_comment_lines {
1443
1444     # Output a group consisting of COMMENT lines
1445
1446     my ($self) = @_;
1447     my $rgroup_lines = $self->[_rgroup_lines_];
1448     return unless ( @{$rgroup_lines} );
1449     my $group_level               = $self->[_group_level_];
1450     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1451     my $leading_space_count       = $self->[_comment_leading_space_count_];
1452     my $leading_string =
1453       $self->get_leading_string( $leading_space_count, $group_level );
1454
1455     # look for excessively long lines
1456     my $max_excess = 0;
1457     foreach my $item ( @{$rgroup_lines} ) {
1458         my ( $str, $str_len ) = @{$item};
1459         my $excess =
1460           $str_len + $leading_space_count - $group_maximum_line_length;
1461         if ( $excess > $max_excess ) {
1462             $max_excess = $excess;
1463         }
1464     }
1465
1466     # zero leading space count if any lines are too long
1467     if ( $max_excess > 0 ) {
1468         $leading_space_count -= $max_excess;
1469         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1470         my $file_writer_object = $self->[_file_writer_object_];
1471         my $last_outdented_line_at =
1472           $file_writer_object->get_output_line_number();
1473         my $nlines = @{$rgroup_lines};
1474         $self->[_last_outdented_line_at_] =
1475           $last_outdented_line_at + $nlines - 1;
1476         my $outdented_line_count = $self->[_outdented_line_count_];
1477         unless ($outdented_line_count) {
1478             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1479         }
1480         $outdented_line_count += $nlines;
1481         $self->[_outdented_line_count_] = $outdented_line_count;
1482     }
1483
1484     # write the lines
1485     my $outdent_long_lines = 0;
1486
1487     foreach my $item ( @{$rgroup_lines} ) {
1488         my ( $str, $str_len, $Kend ) = @{$item};
1489         $self->valign_output_step_B(
1490             {
1491                 leading_space_count       => $leading_space_count,
1492                 line                      => $str,
1493                 line_length               => $str_len,
1494                 side_comment_length       => 0,
1495                 outdent_long_lines        => $outdent_long_lines,
1496                 rvertical_tightness_flags => undef,
1497                 level                     => $group_level,
1498                 level_end                 => $group_level,
1499                 Kend                      => $Kend,
1500                 maximum_line_length       => $group_maximum_line_length,
1501             }
1502         );
1503     }
1504
1505     $self->initialize_for_new_group();
1506     return;
1507 }
1508
1509 ######################################################
1510 # CODE SECTION 5: Code to process groups of code lines
1511 ######################################################
1512
1513 sub _flush_group_lines {
1514
1515     # This is the vertical aligner internal flush, which leaves the cache
1516     # intact
1517     my ( $self, $level_jump ) = @_;
1518
1519     # $level_jump = $next_level-$group_level, if known
1520     #             = undef if not known
1521     # Note: only the sign of the jump is needed
1522
1523     my $rgroup_lines = $self->[_rgroup_lines_];
1524     return unless ( @{$rgroup_lines} );
1525     my $group_type  = $self->[_group_type_];
1526     my $group_level = $self->[_group_level_];
1527
1528     # Debug
1529     0 && do {
1530         my ( $a, $b, $c ) = caller();
1531         my $nlines = @{$rgroup_lines};
1532         print STDOUT
1533 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1534     };
1535
1536     ############################################
1537     # Section 1: Handle a group of COMMENT lines
1538     ############################################
1539     if ( $group_type eq 'COMMENT' ) {
1540         $self->_flush_comment_lines();
1541         return;
1542     }
1543
1544     #########################################################################
1545     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
1546     # aligning happens here in the following steps:
1547     #########################################################################
1548
1549     # STEP 1: Remove most unmatched tokens. They block good alignments.
1550     my ( $max_lev_diff, $saw_side_comment ) =
1551       delete_unmatched_tokens( $rgroup_lines, $group_level );
1552
1553     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1554     # matching common alignments.  The indexes of these subgroups are in the
1555     # return variable.
1556     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1557
1558     # STEP 3: Sweep left to right through the lines, looking for leading
1559     # alignment tokens shared by groups.
1560     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1561       if ( @{$rgroups} > 1 );
1562
1563     # STEP 4: Move side comments to a common column if possible.
1564     if ($saw_side_comment) {
1565         $self->align_side_comments( $rgroup_lines, $rgroups );
1566     }
1567
1568     # STEP 5: For the -lp option, increase the indentation of lists
1569     # to the desired amount, but do not exceed the line length limit.
1570
1571     # We are allowed to shift a group of lines to the right if:
1572     #  (1) its level is greater than the level of the previous group, and
1573     #  (2) its level is greater than the level of the next line to be written.
1574
1575     my $extra_indent_ok;
1576     if ( $group_level > $self->[_last_level_written_] ) {
1577
1578         # Use the level jump to next line to come, if given
1579         if ( defined($level_jump) ) {
1580             $extra_indent_ok = $level_jump < 0;
1581         }
1582
1583         # Otherwise, assume the next line has the level of the end of last line.
1584         # This fixes case c008.
1585         else {
1586             my $level_end = $rgroup_lines->[-1]->get_level_end();
1587             $extra_indent_ok = $group_level > $level_end;
1588         }
1589     }
1590
1591     my $extra_leading_spaces =
1592       $extra_indent_ok
1593       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1594       : 0;
1595
1596     # STEP 6: Output the lines.
1597     # All lines in this group have the same leading spacing and maximum line
1598     # length
1599     my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
1600     my $group_maximum_line_length =
1601       $rgroup_lines->[0]->get_maximum_line_length();
1602
1603     foreach my $line ( @{$rgroup_lines} ) {
1604         $self->valign_output_step_A(
1605             {
1606                 line                 => $line,
1607                 min_ci_gap           => 0,
1608                 do_not_align         => 0,
1609                 group_leader_length  => $group_leader_length,
1610                 extra_leading_spaces => $extra_leading_spaces,
1611                 level                => $group_level,
1612                 maximum_line_length  => $group_maximum_line_length,
1613             }
1614         );
1615     }
1616
1617     # Let the formatter know that this object has been processed and any
1618     # recoverable spaces have been handled.  This is needed for setting the
1619     # closing paren location in -lp mode.
1620     my $object = $rgroup_lines->[0]->get_indentation();
1621     if ( ref($object) ) { $object->set_recoverable_spaces(0) }
1622
1623     $self->initialize_for_new_group();
1624     return;
1625 }
1626
1627 {    ## closure for sub sweep_top_down
1628
1629     my $rall_lines;         # all of the lines
1630     my $grp_level;          # level of all lines
1631     my $rgroups;            # describes the partition of lines we will make here
1632     my $group_line_count;   # number of lines in current partition
1633
1634     BEGIN { $rgroups = [] }
1635
1636     sub initialize_for_new_rgroup {
1637         $group_line_count = 0;
1638         return;
1639     }
1640
1641     sub add_to_rgroup {
1642
1643         my ($jend) = @_;
1644         my $rline = $rall_lines->[$jend];
1645
1646         my $jbeg = $jend;
1647         if ( $group_line_count == 0 ) {
1648             install_new_alignments($rline);
1649         }
1650         else {
1651             my $rvals = pop @{$rgroups};
1652             $jbeg = $rvals->[0];
1653             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1654         }
1655         push @{$rgroups}, [ $jbeg, $jend, undef ];
1656         $group_line_count++;
1657         return;
1658     }
1659
1660     sub get_rgroup_jrange {
1661
1662         return unless @{$rgroups};
1663         return unless ( $group_line_count > 0 );
1664         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1665         return ( $jbeg, $jend );
1666     }
1667
1668     sub end_rgroup {
1669
1670         my ($imax_align) = @_;
1671         return unless @{$rgroups};
1672         return unless ( $group_line_count > 0 );
1673
1674         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1675         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1676
1677         # Undo some alignments of poor two-line combinations.
1678         # We had to wait until now to know the line count.
1679         if ( $jend - $jbeg == 1 ) {
1680             my $line_0 = $rall_lines->[$jbeg];
1681             my $line_1 = $rall_lines->[$jend];
1682
1683             my $imax_pair = $line_1->get_imax_pair();
1684             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1685
1686             ## flag for possible future use:
1687             ## my $is_isolated_pair = $imax_pair < 0
1688             ##  && ( $jbeg == 0
1689             ##    || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
1690
1691             my $imax_prev =
1692               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
1693
1694             my ( $is_marginal, $imax_align_fix ) =
1695               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1696                 $imax_prev );
1697             if ($is_marginal) {
1698                 combine_fields( $line_0, $line_1, $imax_align_fix );
1699             }
1700         }
1701
1702         initialize_for_new_rgroup();
1703         return;
1704     }
1705
1706     sub block_penultimate_match {
1707
1708         # emergency reset to prevent sweep_left_to_right from trying to match a
1709         # failed terminal else match
1710         return unless @{$rgroups} > 1;
1711         $rgroups->[-2]->[2] = -1;
1712         return;
1713     }
1714
1715     sub sweep_top_down {
1716         my ( $self, $rlines, $group_level ) = @_;
1717
1718         # Partition the set of lines into final alignment subgroups
1719         # and store the alignments with the lines.
1720
1721         # The alignment subgroups we are making here are groups of consecutive
1722         # lines which have (1) identical alignment tokens and (2) do not
1723         # exceed the allowable maximum line length.  A later sweep from
1724         # left-to-right ('sweep_lr') will handle additional alignments.
1725
1726         # transfer args to closure variables
1727         $rall_lines = $rlines;
1728         $grp_level  = $group_level;
1729         $rgroups    = [];
1730         initialize_for_new_rgroup();
1731         return unless @{$rlines};    # shouldn't happen
1732
1733         # Unset the _end_group flag for the last line if it it set because it
1734         # is not needed and can causes problems for -lp formatting
1735         $rall_lines->[-1]->set_end_group(0);
1736
1737         # Loop over all lines ...
1738         my $jline = -1;
1739         foreach my $new_line ( @{$rall_lines} ) {
1740             $jline++;
1741
1742             # Start a new subgroup if necessary
1743             if ( !$group_line_count ) {
1744                 add_to_rgroup($jline);
1745                 if ( $new_line->get_end_group() ) {
1746                     end_rgroup(-1);
1747                 }
1748                 next;
1749             }
1750
1751             my $j_terminal_match = $new_line->get_j_terminal_match();
1752             my ( $jbeg, $jend ) = get_rgroup_jrange();
1753             if ( !defined($jbeg) ) {
1754
1755                 # safety check, shouldn't happen
1756                 warning(<<EOM);
1757 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1758 undefined index for group line count $group_line_count
1759 EOM
1760                 $jbeg = $jline;
1761             }
1762             my $base_line = $rall_lines->[$jbeg];
1763
1764             # Initialize a global flag saying if the last line of the group
1765             # should match end of group and also terminate the group.  There
1766             # should be no returns between here and where the flag is handled
1767             # at the bottom.
1768             my $col_matching_terminal = 0;
1769             if ( defined($j_terminal_match) ) {
1770
1771                 # remember the column of the terminal ? or { to match with
1772                 $col_matching_terminal =
1773                   $base_line->get_column($j_terminal_match);
1774
1775                 # Ignore an undefined value as a defensive step; shouldn't
1776                 # normally happen.
1777                 $col_matching_terminal = 0
1778                   unless defined($col_matching_terminal);
1779             }
1780
1781             # -------------------------------------------------------------
1782             # Allow hanging side comment to join current group, if any.  The
1783             # only advantage is to keep the other tokens in the same group. For
1784             # example, this would make the '=' align here:
1785             #  $ax         = 1;           # side comment
1786             #                             # hanging side comment
1787             #  $boondoggle = 5;           # side comment
1788             #  $beetle     = 5;           # side comment
1789
1790             # here is another example..
1791
1792             #  _rtoc_name_count   => {},                   # hash to track ..
1793             #  _rpackage_stack    => [],                   # stack to check ..
1794             #                                              # name changes
1795             #  _rlast_level       => \$last_level,         # brace indentation
1796             #
1797             #
1798             # If this were not desired, the next step could be skipped.
1799             # -------------------------------------------------------------
1800             if ( $new_line->get_is_hanging_side_comment() ) {
1801                 join_hanging_comment( $new_line, $base_line );
1802             }
1803
1804             # If this line has no matching tokens, then flush out the lines
1805             # BEFORE this line unless both it and the previous line have side
1806             # comments.  This prevents this line from pushing side comments out
1807             # to the right.
1808             elsif ( $new_line->get_jmax() == 1 ) {
1809
1810                 # There are no matching tokens, so now check side comments.
1811                 # Programming note: accessing arrays with index -1 is
1812                 # risky in Perl, but we have verified there is at least one
1813                 # line in the group and that there is at least one field.
1814                 my $prev_comment =
1815                   $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
1816                 my $side_comment = $new_line->get_rfields()->[-1];
1817                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1818             }
1819
1820             # See if the new line matches and fits the current group,
1821             # if it still exists. Flush the current group if not.
1822             my $match_code;
1823             if ($group_line_count) {
1824                 ( $match_code, my $imax_align ) =
1825                   $self->check_match( $new_line, $base_line,
1826                     $rall_lines->[ $jline - 1 ] );
1827                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1828             }
1829
1830             # Store the new line
1831             add_to_rgroup($jline);
1832
1833             if ( defined($j_terminal_match) ) {
1834
1835                 # Decide if we should fix a terminal match. We can either:
1836                 # 1. fix it and prevent the sweep_lr from changing it, or
1837                 # 2. leave it alone and let sweep_lr try to fix it.
1838
1839                 # The current logic is to fix it if:
1840                 # -it has not joined to previous lines,
1841                 # -and either the previous subgroup has just 1 line, or
1842                 # -this line matched but did not fit (so sweep won't work)
1843                 my $fixit;
1844                 if ( $group_line_count == 1 ) {
1845                     $fixit ||= $match_code;
1846                     if ( !$fixit ) {
1847                         if ( @{$rgroups} > 1 ) {
1848                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1849                             my $nlines = $jendx - $jbegx + 1;
1850                             $fixit ||= $nlines <= 1;
1851                         }
1852                     }
1853                 }
1854
1855                 if ($fixit) {
1856                     $base_line = $new_line;
1857                     my $col_now = $base_line->get_column($j_terminal_match);
1858
1859                     # Ignore an undefined value as a defensive step; shouldn't
1860                     # normally happen.
1861                     $col_now = 0 unless defined($col_now);
1862
1863                     my $pad = $col_matching_terminal - $col_now;
1864                     my $padding_available =
1865                       $base_line->get_available_space_on_right();
1866                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1867                         $base_line->increase_field_width( $j_terminal_match,
1868                             $pad );
1869                     }
1870
1871                     # do not let sweep_left_to_right change an isolated 'else'
1872                     if ( !$new_line->get_is_terminal_ternary() ) {
1873                         block_penultimate_match();
1874                     }
1875                 }
1876                 end_rgroup(-1);
1877             }
1878
1879             # end the group if we know we cannot match next line.
1880             elsif ( $new_line->get_end_group() ) {
1881                 end_rgroup(-1);
1882             }
1883         } ## end loop over lines
1884
1885         end_rgroup(-1);
1886         return ($rgroups);
1887     }
1888 }
1889
1890 sub two_line_pad {
1891
1892     my ( $line_m, $line, $imax_min ) = @_;
1893
1894     # Given:
1895     #  two isolated (list) lines
1896     #  imax_min = number of common alignment tokens
1897     # Return:
1898     #  $pad_max = maximum suggested pad distance
1899     #           = 0 if alignment not recommended
1900     # Note that this is only for two lines which do not have alignment tokens
1901     # in common with any other lines.  It is intended for lists, but it might
1902     # also be used for two non-list lines with a common leading '='.
1903
1904     # Allow alignment if the difference in the two unpadded line lengths
1905     # is not more than either line length.  The idea is to avoid
1906     # aligning lines with very different field lengths, like these two:
1907
1908     #   [
1909     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
1910     #       1, 0, 0, 0, undef, 0, 0
1911     #   ];
1912     my $rfield_lengths   = $line->get_rfield_lengths();
1913     my $rfield_lengths_m = $line_m->get_rfield_lengths();
1914
1915     # Safety check - shouldn't happen
1916     return 0
1917       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
1918
1919     my $lensum_m = 0;
1920     my $lensum   = 0;
1921     foreach my $i ( 0 .. $imax_min ) {
1922         $lensum_m += $rfield_lengths_m->[$i];
1923         $lensum   += $rfield_lengths->[$i];
1924     }
1925
1926     my ( $lenmin, $lenmax ) =
1927       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
1928
1929     my $patterns_match;
1930     if ( $line_m->get_list_type() && $line->get_list_type() ) {
1931         $patterns_match = 1;
1932         my $rpatterns_m = $line_m->get_rpatterns();
1933         my $rpatterns   = $line->get_rpatterns();
1934         foreach my $i ( 0 .. $imax_min ) {
1935             my $pat   = $rpatterns->[$i];
1936             my $pat_m = $rpatterns_m->[$i];
1937             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
1938         }
1939     }
1940
1941     my $pad_max = $lenmax;
1942     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
1943
1944     return $pad_max;
1945 }
1946
1947 sub sweep_left_to_right {
1948
1949     my ( $rlines, $rgroups, $group_level ) = @_;
1950
1951     # So far we have divided the lines into groups having an equal number of
1952     # identical alignments.  Here we are going to look for common leading
1953     # alignments between the different groups and align them when possible.
1954     # For example, the three lines below are in three groups because each line
1955     # has a different number of commas.  In this routine we will sweep from
1956     # left to right, aligning the leading commas as we go, but stopping if we
1957     # hit the line length limit.
1958
1959     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
1960     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
1961     #  my ( $km,  $area, $varea );
1962
1963     # nothing to do if just one group
1964     my $ng_max = @{$rgroups} - 1;
1965     return unless ( $ng_max > 0 );
1966
1967     ############################################################################
1968     # Step 1: Loop over groups to find all common leading alignment tokens
1969     ############################################################################
1970
1971     my $line;
1972     my $rtokens;
1973     my $imax;     # index of maximum non-side-comment alignment token
1974     my $istop;    # an optional stopping index
1975     my $jbeg;     # starting line index
1976     my $jend;     # ending line index
1977
1978     my $line_m;
1979     my $rtokens_m;
1980     my $imax_m;
1981     my $istop_m;
1982     my $jbeg_m;
1983     my $jend_m;
1984
1985     my $istop_mm;
1986
1987     # Look at neighboring pairs of groups and form a simple list
1988     # of all common leading alignment tokens. Foreach such match we
1989     # store [$i, $ng], where
1990     #  $i = index of the token in the line (0,1,...)
1991     #  $ng is the second of the two groups with this common token
1992     my @icommon;
1993
1994     # Hash to hold the maximum alignment change for any group
1995     my %max_move;
1996
1997     # a small number of columns
1998     my $short_pad = 4;
1999
2000     my $ng = -1;
2001     foreach my $item ( @{$rgroups} ) {
2002         $ng++;
2003
2004         $istop_mm = $istop_m;
2005
2006         # save _m values of previous group
2007         $line_m    = $line;
2008         $rtokens_m = $rtokens;
2009         $imax_m    = $imax;
2010         $istop_m   = $istop;
2011         $jbeg_m    = $jbeg;
2012         $jend_m    = $jend;
2013
2014         # Get values for this group. Note that we just have to use values for
2015         # one of the lines of the group since all members have the same
2016         # alignments.
2017         ( $jbeg, $jend, $istop ) = @{$item};
2018
2019         $line    = $rlines->[$jbeg];
2020         $rtokens = $line->get_rtokens();
2021         $imax    = $line->get_jmax() - 2;
2022         $istop   = -1 unless ( defined($istop) );
2023         $istop   = $imax if ( $istop > $imax );
2024
2025         # Initialize on first group
2026         next if ( $ng == 0 );
2027
2028         # Use the minimum index limit of the two groups
2029         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2030
2031         # Also impose a limit if given.
2032         if ( $istop_m < $imax_min ) {
2033             $imax_min = $istop_m;
2034         }
2035
2036         # Special treatment of two one-line groups isolated from other lines,
2037         # unless they form a simple list or a terminal match.  Otherwise the
2038         # alignment can look strange in some cases.
2039         my $list_type = $rlines->[$jbeg]->get_list_type();
2040         if (
2041                $jend == $jbeg
2042             && $jend_m == $jbeg_m
2043             && ( $ng == 1 || $istop_mm < 0 )
2044             && ( $ng == $ng_max || $istop < 0 )
2045             && !$line->get_j_terminal_match()
2046
2047             # Only do this for imperfect matches. This is normally true except
2048             # when two perfect matches cannot form a group because the line
2049             # length limit would be exceeded. In that case we can still try
2050             # to match as many alignments as possible.
2051             && ( $imax != $imax_m || $istop_m != $imax_m )
2052           )
2053         {
2054
2055             # We will just align assignments and simple lists
2056             next unless ( $imax_min >= 0 );
2057             next
2058               unless ( $rtokens->[0] =~ /^=\d/
2059                 || $list_type );
2060
2061             # In this case we will limit padding to a short distance.  This
2062             # is a compromise to keep some vertical alignment but prevent large
2063             # gaps, which do not look good for just two lines.
2064             my $pad_max =
2065               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2066             next unless ($pad_max);
2067             my $ng_m = $ng - 1;
2068             $max_move{"$ng_m"} = $pad_max;
2069             $max_move{"$ng"}   = $pad_max;
2070         }
2071
2072         # Loop to find all common leading tokens.
2073         if ( $imax_min >= 0 ) {
2074             foreach my $i ( 0 .. $imax_min ) {
2075                 my $tok   = $rtokens->[$i];
2076                 my $tok_m = $rtokens_m->[$i];
2077                 last if ( $tok ne $tok_m );
2078                 push @icommon, [ $i, $ng, $tok ];
2079             }
2080         }
2081     }
2082     return unless @icommon;
2083
2084     ###########################################################
2085     # Step 2: Reorder and consolidate the list into a task list
2086     ###########################################################
2087
2088     # We have to work first from lowest token index to highest, then by group,
2089     # sort our list first on token index then group number
2090     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
2091
2092     # Make a task list of the form
2093     #   [$i, ng_beg, $ng_end, $tok], ..
2094     # where
2095     #   $i is the index of the token to be aligned
2096     #   $ng_beg..$ng_end is the group range for this action
2097     my @todo;
2098     my ( $i, $ng_end, $tok );
2099     foreach my $item (@icommon) {
2100         my $ng_last = $ng_end;
2101         my $i_last  = $i;
2102         ( $i, $ng_end, $tok ) = @{$item};
2103         my $ng_beg = $ng_end - 1;
2104         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
2105             my $var = pop(@todo);
2106             $ng_beg = $var->[1];
2107         }
2108         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2109         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2110     }
2111
2112     ###############################
2113     # Step 3: Execute the task list
2114     ###############################
2115     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2116         $group_level );
2117     return;
2118 }
2119
2120 {    ## closure for sub do_left_to_right_sweep
2121
2122     my %is_good_alignment_token;
2123
2124     BEGIN {
2125
2126         # One of the most difficult aspects of vertical alignment is knowing
2127         # when not to align.  Alignment can go from looking very nice to very
2128         # bad when overdone.  In the sweep algorithm there are two special
2129         # cases where we may need to limit padding to a '$short_pad' distance
2130         # to avoid some very ugly formatting:
2131
2132         # 1. Two isolated lines with partial alignment
2133         # 2. A 'tail-wag-dog' situation, in which a single terminal
2134         #    line with partial alignment could cause a significant pad
2135         #    increase in many previous lines if allowed to join the alignment.
2136
2137         # For most alignment tokens, we will allow only a small pad to be
2138         # introduced (the hardwired $short_pad variable) . But for some 'good'
2139         # alignments we can be less restrictive.
2140
2141         # These are 'good' alignments, which are allowed more padding:
2142         my @q = qw(
2143           => = ? if unless or || {
2144         );
2145         push @q, ',';
2146         @is_good_alignment_token{@q} = (0) x scalar(@q);
2147
2148         # Promote a few of these to 'best', with essentially no pad limit:
2149         $is_good_alignment_token{'='}      = 1;
2150         $is_good_alignment_token{'if'}     = 1;
2151         $is_good_alignment_token{'unless'} = 1;
2152         $is_good_alignment_token{'=>'}     = 1
2153
2154           # Note the hash values are set so that:
2155           #         if ($is_good_alignment_token{$raw_tok}) => best
2156           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2157
2158     }
2159
2160     sub do_left_to_right_sweep {
2161         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2162           = @_;
2163
2164         # $blocking_level[$nj is the level at a match failure between groups
2165         # $ng-1 and $ng
2166         my @blocking_level;
2167         my $group_list_type = $rlines->[0]->get_list_type();
2168
2169         my $move_to_common_column = sub {
2170
2171             # Move the alignment column of token $itok to $col_want for a
2172             # sequence of groups.
2173             my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
2174             return unless ( defined($ngb) && $nge > $ngb );
2175             foreach my $ng ( $ngb .. $nge ) {
2176
2177                 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2178                 my $line  = $rlines->[$jbeg];
2179                 my $col   = $line->get_column($itok);
2180                 my $avail = $line->get_available_space_on_right();
2181                 my $move  = $col_want - $col;
2182                 if ( $move > 0 ) {
2183
2184                     # limit padding increase in isolated two lines
2185                     next
2186                       if ( defined( $rmax_move->{$ng} )
2187                         && $move > $rmax_move->{$ng}
2188                         && !$is_good_alignment_token{$raw_tok} );
2189
2190                     $line->increase_field_width( $itok, $move );
2191                 }
2192                 elsif ( $move < 0 ) {
2193
2194                     # spot to take special action on failure to move
2195                 }
2196             }
2197             return;
2198         };
2199
2200         foreach my $task ( @{$rtodo} ) {
2201             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2202
2203             # Nothing to do for a single group
2204             next unless ( $ng_end > $ng_beg );
2205
2206             my $ng_first;  # index of the first group of a continuous sequence
2207             my $col_want;  # the common alignment column of a sequence of groups
2208             my $col_limit; # maximum column before bumping into max line length
2209             my $line_count_ng_m = 0;
2210             my $jmax_m;
2211             my $it_stop_m;
2212
2213             # Loop over the groups
2214             # 'ix_' = index in the array of lines
2215             # 'ng_' = index in the array of groups
2216             # 'it_' = index in the array of tokens
2217             my $ix_min      = $rgroups->[$ng_beg]->[0];
2218             my $ix_max      = $rgroups->[$ng_end]->[1];
2219             my $lines_total = $ix_max - $ix_min + 1;
2220             foreach my $ng ( $ng_beg .. $ng_end ) {
2221                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2222                 my $line_count_ng = $ix_end - $ix_beg + 1;
2223
2224                 # Important: note that since all lines in a group have a common
2225                 # alignments object, we just have to work on one of the lines
2226                 # (the first line).  All of the rest will be changed
2227                 # automatically.
2228                 my $line = $rlines->[$ix_beg];
2229                 my $jmax = $line->get_jmax();
2230
2231                 # the maximum space without exceeding the line length:
2232                 my $avail   = $line->get_available_space_on_right();
2233                 my $col     = $line->get_column($itok);
2234                 my $col_max = $col + $avail;
2235
2236                 # Initialize on first group
2237                 if ( !defined($col_want) ) {
2238                     $ng_first        = $ng;
2239                     $col_want        = $col;
2240                     $col_limit       = $col_max;
2241                     $line_count_ng_m = $line_count_ng;
2242                     $jmax_m          = $jmax;
2243                     $it_stop_m       = $it_stop;
2244                     next;
2245                 }
2246
2247                 # RULE: Throw a blocking flag upon encountering a token level
2248                 # different from the level of the first blocking token.  For
2249                 # example, in the following example, if the = matches get
2250                 # blocked between two groups as shown, then we want to start
2251                 # blocking matches at the commas, which are at deeper level, so
2252                 # that we do not get the big gaps shown here:
2253
2254                 #  my $unknown3 = pack( "v",          -2 );
2255                 #  my $unknown4 = pack( "v",          0x09 );
2256                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
2257                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
2258                 #  my $root_startblock = pack( "V",   $root_start );
2259                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
2260
2261                 # On the other hand, it is okay to keep matching at the same
2262                 # level such as in a simple list of commas and/or fat commas.
2263
2264                 my $is_blocked = defined( $blocking_level[$ng] )
2265                   && $lev > $blocking_level[$ng];
2266
2267                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2268                 # Do not let one or two lines with a **different number of
2269                 # alignments** open up a big gap in a large block.  For
2270                 # example, we will prevent something like this, where the first
2271                 # line pries open the rest:
2272
2273             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2274             #  $worksheet->write( "C7", "",                    $format );
2275             #  $worksheet->write( "D7", "",                    $format );
2276             #  $worksheet->write( "D8", "",                    $format );
2277             #  $worksheet->write( "D8", "",                    $format );
2278
2279                 # We should exclude from consideration two groups which are
2280                 # effectively the same but separated because one does not
2281                 # fit in the maximum allowed line length.
2282                 my $is_same_group =
2283                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2284
2285                 my $lines_above = $ix_beg - $ix_min;
2286                 my $lines_below = $lines_total - $lines_above;
2287
2288                 # Increase the tolerable gap for certain favorable factors
2289                 my $factor    = 1;
2290                 my $top_level = $lev == $group_level;
2291
2292                 # Align best top level alignment tokens like '=', 'if', ...
2293                 # A factor of 10 allows a gap of up to 40 spaces
2294                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2295                     $factor = 10;
2296                 }
2297
2298                 # Otherwise allow some minimal padding of good alignments
2299                 elsif (
2300
2301                     defined( $is_good_alignment_token{$raw_tok} )
2302
2303                     # We have to be careful if there are just 2 lines.  This
2304                     # two-line factor allows large gaps only for 2 lines which
2305                     # are simple lists with fewer items on the second line. It
2306                     # gives results similar to previous versions of perltidy.
2307                     && (   $lines_total > 2
2308                         || $group_list_type && $jmax < $jmax_m && $top_level )
2309                   )
2310                 {
2311                     $factor += 1;
2312                     if ($top_level) {
2313                         $factor += 1;
2314                     }
2315                 }
2316
2317                 my $is_big_gap;
2318                 if ( !$is_same_group ) {
2319                     $is_big_gap ||=
2320                       (      $lines_above == 1
2321                           || $lines_above == 2 && $lines_below >= 4 )
2322                       && $col_want > $col + $short_pad * $factor;
2323                     $is_big_gap ||=
2324                       (      $lines_below == 1
2325                           || $lines_below == 2 && $lines_above >= 4 )
2326                       && $col > $col_want + $short_pad * $factor;
2327                 }
2328
2329                 # if match is limited by gap size, stop aligning at this level
2330                 if ($is_big_gap) {
2331                     $blocking_level[$ng] = $lev - 1;
2332                 }
2333
2334                 # quit and restart if it cannot join this batch
2335                 if (   $col_want > $col_max
2336                     || $col > $col_limit
2337                     || $is_big_gap
2338                     || $is_blocked )
2339                 {
2340
2341                     # remember the level of the first blocking token
2342                     if ( !defined( $blocking_level[$ng] ) ) {
2343                         $blocking_level[$ng] = $lev;
2344                     }
2345
2346                     $move_to_common_column->(
2347                         $ng_first, $ng - 1, $itok, $col_want, $raw_tok
2348                     );
2349                     $ng_first        = $ng;
2350                     $col_want        = $col;
2351                     $col_limit       = $col_max;
2352                     $line_count_ng_m = $line_count_ng;
2353                     $jmax_m          = $jmax;
2354                     $it_stop_m       = $it_stop;
2355                     next;
2356                 }
2357
2358                 $line_count_ng_m += $line_count_ng;
2359
2360                 # update the common column and limit
2361                 if ( $col > $col_want )      { $col_want  = $col }
2362                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2363
2364             } ## end loop over groups
2365
2366             if ( $ng_end > $ng_first ) {
2367                 $move_to_common_column->(
2368                     $ng_first, $ng_end, $itok, $col_want, $raw_tok
2369                 );
2370             } ## end loop over groups for one task
2371         } ## end loop over tasks
2372
2373         return;
2374     }
2375 }
2376
2377 sub delete_selected_tokens {
2378
2379     my ( $line_obj, $ridel ) = @_;
2380
2381     # $line_obj    is the line to be modified
2382     # $ridel       is a ref to list of indexes to be deleted
2383
2384     # remove an unused alignment token(s) to improve alignment chances
2385
2386     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2387
2388     my $jmax_old           = $line_obj->get_jmax();
2389     my $rfields_old        = $line_obj->get_rfields();
2390     my $rfield_lengths_old = $line_obj->get_rfield_lengths();
2391     my $rpatterns_old      = $line_obj->get_rpatterns();
2392     my $rtokens_old        = $line_obj->get_rtokens();
2393     my $j_terminal_match   = $line_obj->get_j_terminal_match();
2394
2395     use constant EXPLAIN_DELETE_SELECTED => 0;
2396
2397     local $LIST_SEPARATOR = '> <';
2398     EXPLAIN_DELETE_SELECTED && print <<EOM;
2399 delete indexes: <@{$ridel}>
2400 old jmax: $jmax_old
2401 old tokens: <@{$rtokens_old}>
2402 old patterns: <@{$rpatterns_old}>
2403 old fields: <@{$rfields_old}>
2404 old field_lengths: <@{$rfield_lengths_old}>
2405 EOM
2406
2407     my $rfields_new        = [];
2408     my $rpatterns_new      = [];
2409     my $rtokens_new        = [];
2410     my $rfield_lengths_new = [];
2411
2412     # Convert deletion list to a hash to allow any order, multiple entries,
2413     # and avoid problems with index values out of range
2414     my %delete_me;
2415     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2416
2417     my $pattern_0      = $rpatterns_old->[0];
2418     my $field_0        = $rfields_old->[0];
2419     my $field_length_0 = $rfield_lengths_old->[0];
2420     push @{$rfields_new},        $field_0;
2421     push @{$rfield_lengths_new}, $field_length_0;
2422     push @{$rpatterns_new},      $pattern_0;
2423
2424     # Loop to either copy items or concatenate fields and patterns
2425     my $jmin_del;
2426     foreach my $j ( 0 .. $jmax_old - 1 ) {
2427         my $token        = $rtokens_old->[$j];
2428         my $field        = $rfields_old->[ $j + 1 ];
2429         my $field_length = $rfield_lengths_old->[ $j + 1 ];
2430         my $pattern      = $rpatterns_old->[ $j + 1 ];
2431         if ( !$delete_me{$j} ) {
2432             push @{$rtokens_new},        $token;
2433             push @{$rfields_new},        $field;
2434             push @{$rpatterns_new},      $pattern;
2435             push @{$rfield_lengths_new}, $field_length;
2436         }
2437         else {
2438             if ( !defined($jmin_del) ) { $jmin_del = $j }
2439             $rfields_new->[-1] .= $field;
2440             $rfield_lengths_new->[-1] += $field_length;
2441             $rpatterns_new->[-1] .= $pattern;
2442         }
2443     }
2444
2445     # ----- x ------ x ------ x ------
2446     #t      0        1        2        <- token indexing
2447     #f   0      1        2        3    <- field and pattern
2448
2449     my $jmax_new = @{$rfields_new} - 1;
2450     $line_obj->set_rtokens($rtokens_new);
2451     $line_obj->set_rpatterns($rpatterns_new);
2452     $line_obj->set_rfields($rfields_new);
2453     $line_obj->set_rfield_lengths($rfield_lengths_new);
2454     $line_obj->set_jmax($jmax_new);
2455
2456     # The value of j_terminal_match will be incorrect if we delete tokens prior
2457     # to it. We will have to give up on aligning the terminal tokens if this
2458     # happens.
2459     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2460         $line_obj->set_j_terminal_match(undef);
2461     }
2462
2463     # update list type -
2464     if ( $line_obj->get_list_seqno() ) {
2465
2466         ## This works, but for efficiency see if we need to make a change:
2467         ## decide_if_list($line_obj);
2468
2469         # An existing list will still be a list but with possibly different
2470         # leading token
2471         my $old_list_type = $line_obj->get_list_type();
2472         my $new_list_type = EMPTY_STRING;
2473         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2474             $new_list_type = $rtokens_new->[0];
2475         }
2476         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2477             decide_if_list($line_obj);
2478         }
2479     }
2480
2481     EXPLAIN_DELETE_SELECTED && print <<EOM;
2482
2483 new jmax: $jmax_new
2484 new tokens: <@{$rtokens_new}>
2485 new patterns: <@{$rpatterns_new}>
2486 new fields: <@{$rfields_new}>
2487 EOM
2488     return;
2489 }
2490
2491 {    ## closure for sub decode_alignment_token
2492
2493     # This routine is called repeatedly for each token, so it needs to be
2494     # efficient.  We can speed things up by remembering the inputs and outputs
2495     # in a hash.
2496     my %decoded_token;
2497
2498     sub initialize_decode {
2499
2500         # We will re-initialize the hash for each file. Otherwise, there is
2501         # a danger that the hash can become arbitrarily large if a very large
2502         # number of files is processed at once.
2503         %decoded_token = ();
2504         return;
2505     }
2506
2507     sub decode_alignment_token {
2508
2509         # Unpack the values packed in an alignment token
2510         #
2511         # Usage:
2512         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
2513         #          decode_alignment_token($token);
2514
2515         # Alignment tokens have a trailing decimal level and optional tag (for
2516         # commas):
2517         # For example, the first comma in the following line
2518         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
2519         # is decorated as follows:
2520         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
2521
2522         # An optional token count may be appended with a leading dot.
2523         # Currently this is only done for '=' tokens but this could change.
2524         # For example, consider the following line:
2525         #   $nport   = $port = shift || $name;
2526         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2527         # The second '=' will be '=0.2' [level 0, second equals]
2528         my ($tok) = @_;
2529
2530         if ( defined( $decoded_token{$tok} ) ) {
2531             return @{ $decoded_token{$tok} };
2532         }
2533
2534         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2535         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2536             $raw_tok   = $1;
2537             $lev       = $2;
2538             $tag       = $3 if ($3);
2539             $tok_count = $5 if ($5);
2540         }
2541         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2542         $decoded_token{$tok} = \@vals;
2543         return @vals;
2544     }
2545 }
2546
2547 {    ## closure for sub delete_unmatched_tokens
2548
2549     my %is_assignment;
2550     my %keep_after_deleted_assignment;
2551
2552     BEGIN {
2553         my @q;
2554
2555         @q = qw(
2556           = **= += *= &= <<= &&=
2557           -= /= |= >>= ||= //=
2558           .= %= ^=
2559           x=
2560         );
2561         @is_assignment{@q} = (1) x scalar(@q);
2562
2563         # These tokens may be kept following an = deletion
2564         @q = qw(
2565           if unless or ||
2566         );
2567         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2568
2569     }
2570
2571     # This flag is for testing only and should normally be zero.
2572     use constant TEST_DELETE_NULL => 0;
2573
2574     sub delete_unmatched_tokens {
2575         my ( $rlines, $group_level ) = @_;
2576
2577         # This is a preliminary step in vertical alignment in which we remove
2578         # as many obviously un-needed alignment tokens as possible.  This will
2579         # prevent them from interfering with the final alignment.
2580
2581         # These are the return values
2582         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
2583         my $saw_side_comment = 0;    # used to avoid a call for side comments
2584
2585         # Handle no lines -- shouldn't happen
2586         return unless @{$rlines};
2587
2588         # Handle a single line
2589         if ( @{$rlines} == 1 ) {
2590             my $line   = $rlines->[0];
2591             my $jmax   = $line->get_jmax();
2592             my $length = $line->get_rfield_lengths()->[$jmax];
2593             $saw_side_comment = $length > 0;
2594             return ( $max_lev_diff, $saw_side_comment );
2595         }
2596
2597         my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
2598
2599         # ignore hanging side comments in these operations
2600         my @filtered   = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
2601         my $rnew_lines = \@filtered;
2602
2603         $saw_side_comment = @filtered != @{$rlines};
2604         $max_lev_diff     = 0;
2605
2606         # nothing to do if all lines were hanging side comments
2607         my $jmax = @{$rnew_lines} - 1;
2608         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2609
2610         my @equals_info;
2611         my @line_info;
2612
2613         # create a hash of tokens for each line
2614         my $rline_hashes = [];
2615         foreach my $line ( @{$rnew_lines} ) {
2616             my $rhash     = {};
2617             my $rtokens   = $line->get_rtokens();
2618             my $rpatterns = $line->get_rpatterns();
2619             my $i         = 0;
2620             my ( $i_eq, $tok_eq, $pat_eq );
2621             my ( $lev_min, $lev_max );
2622             foreach my $tok ( @{$rtokens} ) {
2623                 my ( $raw_tok, $lev, $tag, $tok_count ) =
2624                   decode_alignment_token($tok);
2625
2626                 if ( $tok ne '#' ) {
2627                     if ( !defined($lev_min) ) {
2628                         $lev_min = $lev;
2629                         $lev_max = $lev;
2630                     }
2631                     else {
2632                         if ( $lev < $lev_min ) { $lev_min = $lev }
2633                         if ( $lev > $lev_max ) { $lev_max = $lev }
2634                     }
2635                 }
2636                 else {
2637                     if ( !$saw_side_comment ) {
2638                         my $length = $line->get_rfield_lengths()->[ $i + 1 ];
2639                         $saw_side_comment ||= $length;
2640                     }
2641                 }
2642
2643                 # Possible future upgrade: for multiple matches,
2644                 # record [$i1, $i2, ..] instead of $i
2645                 $rhash->{$tok} =
2646                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2647
2648                 # remember the first equals at line level
2649                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2650
2651                     if ( $lev eq $group_level ) {
2652                         $i_eq   = $i;
2653                         $tok_eq = $tok;
2654                         $pat_eq = $rpatterns->[$i];
2655                     }
2656                 }
2657                 $i++;
2658             }
2659             push @{$rline_hashes}, $rhash;
2660             push @equals_info,     [ $i_eq,    $tok_eq, $pat_eq ];
2661             push @line_info,       [ $lev_min, $lev_max ];
2662             if ( defined($lev_min) ) {
2663                 my $lev_diff = $lev_max - $lev_min;
2664                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2665             }
2666         }
2667
2668         # compare each line pair and record matches
2669         my $rtok_hash = {};
2670         my $nr        = 0;
2671         foreach my $jl ( 0 .. $jmax - 1 ) {
2672             my $nl = $nr;
2673             $nr = 0;
2674             my $jr      = $jl + 1;
2675             my $rhash_l = $rline_hashes->[$jl];
2676             my $rhash_r = $rline_hashes->[$jr];
2677             my $count   = 0;                      # UNUSED NOW?
2678             my $ntoks   = 0;
2679             foreach my $tok ( keys %{$rhash_l} ) {
2680                 $ntoks++;
2681                 if ( defined( $rhash_r->{$tok} ) ) {
2682                     if ( $tok ne '#' ) { $count++; }
2683                     my $il = $rhash_l->{$tok}->[0];
2684                     my $ir = $rhash_r->{$tok}->[0];
2685                     $rhash_l->{$tok}->[2] = $ir;
2686                     $rhash_r->{$tok}->[1] = $il;
2687                     if ( $tok ne '#' ) {
2688                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2689                         $nr++;
2690                     }
2691                 }
2692             }
2693
2694             # Set a line break if no matching tokens between these lines
2695             # (this is not strictly necessary now but does not hurt)
2696             if ( $nr == 0 && $nl > 0 ) {
2697                 $rnew_lines->[$jl]->set_end_group(1);
2698             }
2699
2700             # Also set a line break if both lines have simple equals but with
2701             # different leading characters in patterns.  This check is similar
2702             # to one in sub check_match, and will prevent sub
2703             # prune_alignment_tree from removing alignments which otherwise
2704             # should be kept. This fix is rarely needed, but it can
2705             # occasionally improve formatting.
2706             # For example:
2707             #     my $name = $this->{Name};
2708             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
2709             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
2710             #     my $cast    = ( $type     ? "($type *)" : "" );
2711             # The last two lines start with 'my' and will not match the
2712             # previous line starting with $type, so we do not want
2713             # prune_alignment tree to delete their ? : alignments at a deeper
2714             # level.
2715             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2716             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2717             if ( defined($i_eq_l) && defined($i_eq_r) ) {
2718
2719                 # Also, do not align equals across a change in ci level
2720                 my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
2721                   $rnew_lines->[$jr]->get_ci_level();
2722
2723                 if (
2724                        $tok_eq_l eq $tok_eq_r
2725                     && $i_eq_l == 0
2726                     && $i_eq_r == 0
2727                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2728                         || $ci_jump )
2729                   )
2730                 {
2731                     $rnew_lines->[$jl]->set_end_group(1);
2732                 }
2733             }
2734         }
2735
2736         # find subgroups
2737         my @subgroups;
2738         push @subgroups, [ 0, $jmax ];
2739         foreach my $jl ( 0 .. $jmax - 1 ) {
2740             if ( $rnew_lines->[$jl]->get_end_group() ) {
2741                 $subgroups[-1]->[1] = $jl;
2742                 push @subgroups, [ $jl + 1, $jmax ];
2743             }
2744         }
2745
2746         # flag to allow skipping pass 2
2747         my $saw_large_group;
2748
2749         ############################################################
2750         # PASS 1 over subgroups to remove unmatched alignment tokens
2751         ############################################################
2752         foreach my $item (@subgroups) {
2753             my ( $jbeg, $jend ) = @{$item};
2754
2755             my $nlines = $jend - $jbeg + 1;
2756
2757             ####################################################
2758             # Look for complete if/elsif/else and ternary blocks
2759             ####################################################
2760
2761             # We are looking for a common '$dividing_token' like these:
2762
2763             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
2764             #    elsif ($b)          { $p->{'type'} = 'b'; }
2765             #    elsif ($s)          { $p->{'type'} = 's'; }
2766             #    else                { $p->{'type'} = ''; }
2767             #                        ^----------- dividing_token
2768
2769             #   my $severity =
2770             #      !$routine                     ? '[PFX]'
2771             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
2772             #     : $routine =~ /ck_warn/        ? 'W'
2773             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2774             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
2775             #     : $routine =~ /vWARN\d/        ? '[WDS]'
2776             #     :                                '[PFX]';
2777             #                                    ^----------- dividing_token
2778
2779             # Only look for groups which are more than 2 lines long.  Two lines
2780             # can get messed up doing this, probably due to the various
2781             # two-line rules.
2782
2783             my $dividing_token;
2784             my %token_line_count;
2785             if ( $nlines > 2 ) {
2786
2787                 foreach my $jj ( $jbeg .. $jend ) {
2788                     my %seen;
2789                     my $line    = $rnew_lines->[$jj];
2790                     my $rtokens = $line->get_rtokens();
2791                     foreach my $tok ( @{$rtokens} ) {
2792                         if ( !$seen{$tok} ) {
2793                             $seen{$tok}++;
2794                             $token_line_count{$tok}++;
2795                         }
2796                     }
2797                 }
2798
2799                 foreach my $tok ( keys %token_line_count ) {
2800                     if ( $token_line_count{$tok} == $nlines ) {
2801                         if (   substr( $tok, 0, 1 ) eq '?'
2802                             || substr( $tok, 0, 1 ) eq '{'
2803                             && $tok =~ /^\{\d+if/ )
2804                         {
2805                             $dividing_token = $tok;
2806                             last;
2807                         }
2808                     }
2809                 }
2810             }
2811
2812             #####################################################
2813             # Loop over lines to remove unwanted alignment tokens
2814             #####################################################
2815             foreach my $jj ( $jbeg .. $jend ) {
2816                 my $line    = $rnew_lines->[$jj];
2817                 my $rtokens = $line->get_rtokens();
2818                 my $rhash   = $rline_hashes->[$jj];
2819                 my $i_eq    = $equals_info[$jj]->[0];
2820                 my @idel;
2821                 my $imax = @{$rtokens} - 2;
2822                 my $delete_above_level;
2823                 my $deleted_assignment_token;
2824
2825                 my $saw_dividing_token = EMPTY_STRING;
2826                 $saw_large_group ||= $nlines > 2 && $imax > 1;
2827
2828                 # Loop over all alignment tokens
2829                 foreach my $i ( 0 .. $imax ) {
2830                     my $tok = $rtokens->[$i];
2831                     next if ( $tok eq '#' );    # shouldn't happen
2832                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2833                       @{ $rhash->{$tok} };
2834
2835                     #######################################################
2836                     # Here is the basic RULE: remove an unmatched alignment
2837                     # which does not occur in the surrounding lines.
2838                     #######################################################
2839                     my $delete_me = !defined($il) && !defined($ir);
2840
2841                     # Apply any user controls. Note that not all lines pass
2842                     # this way so they have to be applied elsewhere too.
2843                     my $align_ok = 1;
2844                     if (%valign_control_hash) {
2845                         $align_ok = $valign_control_hash{$raw_tok};
2846                         $align_ok = $valign_control_default
2847                           unless defined($align_ok);
2848                         $delete_me ||= !$align_ok;
2849                     }
2850
2851                     # But now we modify this with exceptions...
2852
2853                     # EXCEPTION 1: If we are in a complete ternary or
2854                     # if/elsif/else group, and this token is not on every line
2855                     # of the group, should we delete it to preserve overall
2856                     # alignment?
2857                     if ($dividing_token) {
2858                         if ( $token_line_count{$tok} >= $nlines ) {
2859                             $saw_dividing_token ||= $tok eq $dividing_token;
2860                         }
2861                         else {
2862
2863                             # For shorter runs, delete toks to save alignment.
2864                             # For longer runs, keep toks after the '{' or '?'
2865                             # to allow sub-alignments within braces.  The
2866                             # number 5 lines is arbitrary but seems to work ok.
2867                             $delete_me ||=
2868                               ( $nlines < 5 || !$saw_dividing_token );
2869                         }
2870                     }
2871
2872                     # EXCEPTION 2: Remove all tokens above a certain level
2873                     # following a previous deletion.  For example, we have to
2874                     # remove tagged higher level alignment tokens following a
2875                     # '=>' deletion because the tags of higher level tokens
2876                     # will now be incorrect. For example, this will prevent
2877                     # aligning commas as follows after deleting the second '=>'
2878                     #    $w->insert(
2879                     #         ListBox => origin => [ 270, 160 ],
2880                     #         size    => [ 200,           55 ],
2881                     #    );
2882                     if ( defined($delete_above_level) ) {
2883                         if ( $lev > $delete_above_level ) {
2884                             $delete_me ||= 1;    #$tag;
2885                         }
2886                         else { $delete_above_level = undef }
2887                     }
2888
2889                     # EXCEPTION 3: Remove all but certain tokens after an
2890                     # assignment deletion.
2891                     if (
2892                         $deleted_assignment_token
2893                         && ( $lev > $group_level
2894                             || !$keep_after_deleted_assignment{$raw_tok} )
2895                       )
2896                     {
2897                         $delete_me ||= 1;
2898                     }
2899
2900                     # EXCEPTION 4: Do not touch the first line of a 2 line
2901                     # terminal match, such as below, because j_terminal has
2902                     # already been set.
2903                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
2904                     #    else      { $tago = $tagc = ''; }
2905                     # But see snippets 'else1.t' and 'else2.t'
2906                     $delete_me = 0
2907                       if ( $jj == $jbeg
2908                         && $has_terminal_match
2909                         && $nlines == 2 );
2910
2911                     # EXCEPTION 5: misc additional rules for commas and equals
2912                     if ($delete_me) {
2913
2914                         # okay to delete second and higher copies of a token
2915                         if ( $tok_count == 1 ) {
2916
2917                             # for a comma...
2918                             if ( $raw_tok eq ',' ) {
2919
2920                                 # Do not delete commas before an equals
2921                                 $delete_me = 0
2922                                   if ( defined($i_eq) && $i < $i_eq );
2923
2924                                 # Do not delete line-level commas
2925                                 $delete_me = 0 if ( $lev <= $group_level );
2926                             }
2927
2928                             # For an assignment at group level..
2929                             if (   $is_assignment{$raw_tok}
2930                                 && $lev == $group_level )
2931                             {
2932
2933                                 # Do not delete if it is the last alignment of
2934                                 # multiple tokens; this will prevent some
2935                                 # undesirable alignments
2936                                 if ( $imax > 0 && $i == $imax ) {
2937                                     $delete_me = 0;
2938                                 }
2939
2940                                 # Otherwise, set a flag to delete most
2941                                 # remaining tokens
2942                                 else { $deleted_assignment_token = $raw_tok }
2943                             }
2944                         }
2945                     }
2946
2947                     # Do not let a user exclusion be reactivated by above rules
2948                     $delete_me ||= !$align_ok;
2949
2950                     #####################################
2951                     # Add this token to the deletion list
2952                     #####################################
2953                     if ($delete_me) {
2954                         push @idel, $i;
2955
2956                         # update deletion propagation flags
2957                         if ( !defined($delete_above_level)
2958                             || $lev < $delete_above_level )
2959                         {
2960
2961                             # delete all following higher level alignments
2962                             $delete_above_level = $lev;
2963
2964                             # but keep deleting after => to next lower level
2965                             # to avoid some bizarre alignments
2966                             if ( $raw_tok eq '=>' ) {
2967                                 $delete_above_level = $lev - 1;
2968                             }
2969                         }
2970                     }
2971                 }    # End loop over alignment tokens
2972
2973                 # Process all deletion requests for this line
2974                 if (@idel) {
2975                     delete_selected_tokens( $line, \@idel );
2976                 }
2977             }    # End loopover lines
2978         }    # End loop over subgroups
2979
2980         #################################################
2981         # PASS 2 over subgroups to remove null alignments
2982         #################################################
2983
2984         # This pass is only used for testing. It is helping to identify
2985         # alignment situations which might be improved with a future more
2986         # general algorithm which adds a tail matching capability.
2987         if (TEST_DELETE_NULL) {
2988             delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
2989               if ($saw_large_group);
2990         }
2991
2992         # PASS 3: Construct a tree of matched lines and delete some small deeper
2993         # levels of tokens.  They also block good alignments.
2994         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2995
2996         # PASS 4: compare all lines for common tokens
2997         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2998
2999         return ( $max_lev_diff, $saw_side_comment );
3000     }
3001 }
3002
3003 sub delete_null_alignments {
3004     my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
3005
3006     # This is an optional second pass for deleting alignment tokens which can
3007     # occasionally improve alignment.  We look for and remove 'null
3008     # alignments', which are alignments that require no padding.  So we can
3009     # 'cheat' and delete them. For example, notice the '=~' alignment in the
3010     # first two lines of the following code:
3011
3012     #    $sysname .= 'del' if $self->label =~ /deletion/;
3013     #    $sysname .= 'ins' if $self->label =~ /insertion/;
3014     #    $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
3015
3016     # These '=~' tokens are already aligned because they are both the same
3017     # distance from the previous alignment token, the 'if'.  So we can
3018     # eliminate them as alignments.  The advantage is that in some cases, such
3019     # as this one, this will allow other tokens to be aligned. In this case we
3020     # then get the 'if' tokens to align:
3021
3022     #   $sysname .= 'del'                     if $self->label =~ /deletion/;
3023     #   $sysname .= 'ins'                     if $self->label =~ /insertion/;
3024     #   $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
3025
3026     # The following rules for limiting this operation have been found to
3027     # work well and avoid problems:
3028
3029     # Rule 1. We only consider a sequence of lines which have the same
3030     # sequence of alignment tokens.
3031
3032     # Rule 2. We never eliminate the first alignment token.  One reason is that
3033     # lines may have different leading indentation spaces, so keeping the
3034     # first alignment token insures that our length measurements start at
3035     # a well-defined point.  Another reason is that nothing is gained because
3036     # the left-to-right sweep can always handle alignment of this token.
3037
3038     # Rule 3. We require that the first alignment token exist in either
3039     # a previous line or a subsequent line.  The reason is that this avoids
3040     # changing two-line matches which go through special logic.
3041
3042     # Rule 4. Do not delete a token which occurs in a previous or subsequent
3043     # line. For example, in the above example, it was ok to eliminate the '=~'
3044     # token from two lines because it did not occur in a surrounding line.
3045     # If it did occur in a surrounding line, the result could be confusing
3046     # or even incorrectly aligned.
3047
3048     # A consequence of these rules is that we only need to consider subgroups
3049     # with at least 3 lines and 2 alignment tokens.
3050
3051     # The subgroup line index range
3052     my ( $jbeg, $jend );
3053
3054     # Vars to keep track of the start of a current sequence of matching
3055     # lines.
3056     my $rtokens_match;
3057     my $rfield_lengths_match;
3058     my $j_match_beg;
3059     my $j_match_end;
3060     my $imax_match;
3061     my $rneed_pad;
3062
3063     # Vars for a line being tested
3064     my $rtokens;
3065     my $rfield_lengths;
3066     my $imax;
3067
3068     my $start_match = sub {
3069         my ($jj) = @_;
3070         $rtokens_match        = $rtokens;
3071         $rfield_lengths_match = $rfield_lengths;
3072         $j_match_beg          = $jj;
3073         $j_match_end          = $jj;
3074         $imax_match           = $imax;
3075         $rneed_pad            = [];
3076         return;
3077     };
3078
3079     my $add_to_match = sub {
3080         my ($jj) = @_;
3081         $j_match_end = $jj;
3082
3083         # Keep track of any padding that would be needed for each token
3084         foreach my $i ( 0 .. $imax ) {
3085             next if ( $rneed_pad->[$i] );
3086             my $length       = $rfield_lengths->[$i];
3087             my $length_match = $rfield_lengths_match->[$i];
3088             if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
3089         }
3090         return;
3091     };
3092
3093     my $end_match = sub {
3094         return unless ( $j_match_end > $j_match_beg );
3095         my $nlines    = $j_match_end - $j_match_beg + 1;
3096         my $rhash_beg = $rline_hashes->[$j_match_beg];
3097         my $rhash_end = $rline_hashes->[$j_match_end];
3098         my @idel;
3099
3100         # Do not delete unless the first token also occurs in a surrounding line
3101         my $tok0 = $rtokens_match->[0];
3102         return
3103           unless (
3104             (
3105                    $j_match_beg > $jbeg
3106                 && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
3107                 $tok0
3108             )
3109             || (   $j_match_end < $jend
3110                 && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
3111                 $tok0 )
3112           );
3113
3114         # Note that we are skipping the token at i=0
3115         foreach my $i ( 1 .. $imax_match ) {
3116
3117             # do not delete a token which requires padding to align
3118             next if ( $rneed_pad->[$i] );
3119
3120             my $tok = $rtokens_match->[$i];
3121
3122             # Do not delete a token which occurs in a surrounding line
3123             next
3124               if ( $j_match_beg > $jbeg
3125                 && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
3126             next
3127               if ( $j_match_end < $jend
3128                 && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
3129
3130             # ok to delete
3131             push @idel, $i;
3132             ##print "ok to delete tok=$tok\n";
3133         }
3134         if (@idel) {
3135             foreach my $j ( $j_match_beg .. $j_match_end ) {
3136                 delete_selected_tokens( $rnew_lines->[$j], \@idel );
3137             }
3138         }
3139         return;
3140     };
3141
3142     foreach my $item ( @{$rsubgroups} ) {
3143         ( $jbeg, $jend ) = @{$item};
3144         my $nlines = $jend - $jbeg + 1;
3145         next unless ( $nlines > 2 );
3146
3147         foreach my $jj ( $jbeg .. $jend ) {
3148             my $line = $rnew_lines->[$jj];
3149             $rtokens        = $line->get_rtokens();
3150             $rfield_lengths = $line->get_rfield_lengths();
3151             $imax           = @{$rtokens} - 2;
3152
3153             # start a new match group
3154             if ( $jj == $jbeg ) {
3155                 $start_match->($jj);
3156                 next;
3157             }
3158
3159             # see if all tokens of this line match the current group
3160             my $match;
3161             if ( $imax == $imax_match ) {
3162                 foreach my $i ( 0 .. $imax ) {
3163                     my $tok       = $rtokens->[$i];
3164                     my $tok_match = $rtokens_match->[$i];
3165                     last if ( $tok ne $tok_match );
3166                 }
3167                 $match = 1;
3168             }
3169
3170             # yes, they all match
3171             if ($match) {
3172                 $add_to_match->($jj);
3173             }
3174
3175             # now, this line does not match
3176             else {
3177                 $end_match->();
3178                 $start_match->($jj);
3179             }
3180         }    # End loopover lines
3181         $end_match->();
3182     }    # End loop over subgroups
3183     return;
3184 } ## end sub delete_null_alignments
3185
3186 sub match_line_pairs {
3187     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3188
3189     # Compare each pair of lines and save information about common matches
3190     # $rlines     = list of lines including hanging side comments
3191     # $rnew_lines = list of lines without any hanging side comments
3192     # $rsubgroups = list of subgroups of the new lines
3193
3194     # TODO:
3195     # Maybe change: imax_pair => pair_match_info = ref to array
3196     #  = [$imax_align, $rMsg, ... ]
3197     #  This may eventually have multi-level match info
3198
3199     # Previous line vars
3200     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3201         $list_type_m, $ci_level_m );
3202
3203     # Current line vars
3204     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3205         $ci_level );
3206
3207     use constant EXPLAIN_COMPARE_PATTERNS => 0;
3208
3209     my $compare_patterns = sub {
3210
3211         # helper routine to decide if patterns match well enough..
3212         # return code:
3213         #   0 = patterns match, continue
3214         #   1 = no match
3215         #   2 = no match, and lines do not match at all
3216
3217         my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3218         my $GoToMsg     = EMPTY_STRING;
3219         my $return_code = 1;
3220
3221         my ( $alignment_token, $lev, $tag, $tok_count ) =
3222           decode_alignment_token($tok);
3223
3224         # We have to be very careful about aligning commas
3225         # when the pattern's don't match, because it can be
3226         # worse to create an alignment where none is needed
3227         # than to omit one.  Here's an example where the ','s
3228         # are not in named containers.  The first line below
3229         # should not match the next two:
3230         #   ( $a, $b ) = ( $b, $r );
3231         #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3232         #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3233         if ( $alignment_token eq ',' ) {
3234
3235             # do not align commas unless they are in named
3236             # containers
3237             $GoToMsg = "do not align commas in unnamed containers";
3238             goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
3239         }
3240
3241         # do not align parens unless patterns match;
3242         # large ugly spaces can occur in math expressions.
3243         elsif ( $alignment_token eq '(' ) {
3244
3245             # But we can allow a match if the parens don't
3246             # require any padding.
3247             $GoToMsg = "do not align '(' unless patterns match or pad=0";
3248             if ( $pad != 0 ) { goto NO_MATCH }
3249         }
3250
3251         # Handle an '=' alignment with different patterns to
3252         # the left.
3253         elsif ( $alignment_token eq '=' ) {
3254
3255             # It is best to be a little restrictive when
3256             # aligning '=' tokens.  Here is an example of
3257             # two lines that we will not align:
3258             #       my $variable=6;
3259             #       $bb=4;
3260             # The problem is that one is a 'my' declaration,
3261             # and the other isn't, so they're not very similar.
3262             # We will filter these out by comparing the first
3263             # letter of the pattern.  This is crude, but works
3264             # well enough.
3265             if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3266                 $GoToMsg = "first character before equals differ";
3267                 goto NO_MATCH;
3268             }
3269
3270             # The introduction of sub 'prune_alignment_tree'
3271             # enabled alignment of lists left of the equals with
3272             # other scalar variables. For example:
3273             # my ( $D, $s, $e ) = @_;
3274             # my $d             = length $D;
3275             # my $c             = $e - $s - $d;
3276
3277             # But this would change formatting of a lot of scripts,
3278             # so for now we prevent alignment of comma lists on the
3279             # left with scalars on the left.  We will also prevent
3280             # any partial alignments.
3281
3282             # set return code 2 if the = is at line level, but
3283             # set return code 1 if the = is below line level, i.e.
3284             #  sub new { my ( $p, $v ) = @_; bless \$v, $p }
3285             #  sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3286
3287             elsif (
3288                 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
3289             {
3290                 $GoToMsg = "mixed commas/no-commas before equals";
3291                 if ( $lev eq $group_level ) {
3292                     $return_code = 2;
3293                 }
3294                 goto NO_MATCH;
3295             }
3296         }
3297
3298       MATCH:
3299         return ( 0, \$GoToMsg );
3300
3301       NO_MATCH:
3302
3303         EXPLAIN_COMPARE_PATTERNS
3304           && print STDERR "no match because $GoToMsg\n";
3305
3306         return ( $return_code, \$GoToMsg );
3307
3308     };    ## end of $compare_patterns->()
3309
3310     # loop over subgroups
3311     foreach my $item ( @{$rsubgroups} ) {
3312         my ( $jbeg, $jend ) = @{$item};
3313         my $nlines = $jend - $jbeg + 1;
3314         next unless ( $nlines > 1 );
3315
3316         # loop over lines in a subgroup
3317         foreach my $jj ( $jbeg .. $jend ) {
3318
3319             $line_m           = $line;
3320             $rtokens_m        = $rtokens;
3321             $rpatterns_m      = $rpatterns;
3322             $rfield_lengths_m = $rfield_lengths;
3323             $imax_m           = $imax;
3324             $list_type_m      = $list_type;
3325             $ci_level_m       = $ci_level;
3326
3327             $line           = $rnew_lines->[$jj];
3328             $rtokens        = $line->get_rtokens();
3329             $rpatterns      = $line->get_rpatterns();
3330             $rfield_lengths = $line->get_rfield_lengths();
3331             $imax           = @{$rtokens} - 2;
3332             $list_type      = $line->get_list_type();
3333             $ci_level       = $line->get_ci_level();
3334
3335             # nothing to do for first line
3336             next if ( $jj == $jbeg );
3337
3338             my $ci_jump = $ci_level - $ci_level_m;
3339
3340             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3341
3342             my $imax_align = -1;
3343
3344             # find number of leading common tokens
3345
3346             #################################
3347             # No match to hanging side comment
3348             #################################
3349             if ( $line->get_is_hanging_side_comment() ) {
3350
3351                 # Should not get here; HSC's have been filtered out
3352                 $imax_align = -1;
3353             }
3354
3355             ##############################
3356             # Handle comma-separated lists
3357             ##############################
3358             elsif ( $list_type && $list_type eq $list_type_m ) {
3359
3360                 # do not align lists across a ci jump with new list method
3361                 if ($ci_jump) { $imax_min = -1 }
3362
3363                 my $i_nomatch = $imax_min + 1;
3364                 foreach my $i ( 0 .. $imax_min ) {
3365                     my $tok   = $rtokens->[$i];
3366                     my $tok_m = $rtokens_m->[$i];
3367                     if ( $tok ne $tok_m ) {
3368                         $i_nomatch = $i;
3369                         last;
3370                     }
3371                 }
3372
3373                 $imax_align = $i_nomatch - 1;
3374             }
3375
3376             ##################
3377             # Handle non-lists
3378             ##################
3379             else {
3380                 my $i_nomatch = $imax_min + 1;
3381                 foreach my $i ( 0 .. $imax_min ) {
3382                     my $tok   = $rtokens->[$i];
3383                     my $tok_m = $rtokens_m->[$i];
3384                     if ( $tok ne $tok_m ) {
3385                         $i_nomatch = $i;
3386                         last;
3387                     }
3388
3389                     my $pat   = $rpatterns->[$i];
3390                     my $pat_m = $rpatterns_m->[$i];
3391
3392                     # If patterns don't match, we have to be careful...
3393                     if ( $pat_m ne $pat ) {
3394                         my $pad =
3395                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3396                         my ( $match_code, $rmsg ) = $compare_patterns->(
3397                             $tok, $tok_m, $pat, $pat_m, $pad
3398                         );
3399                         if ($match_code) {
3400                             if    ( $match_code == 1 ) { $i_nomatch = $i }
3401                             elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3402                             last;
3403                         }
3404                     }
3405                 }
3406                 $imax_align = $i_nomatch - 1;
3407             }
3408
3409             $line_m->set_imax_pair($imax_align);
3410
3411         } ## end loop over lines
3412
3413         # Put fence at end of subgroup
3414         $line->set_imax_pair(-1);
3415
3416     } ## end loop over subgroups
3417
3418     # if there are hanging side comments, propagate the pair info down to them
3419     # so that lines can just look back one line for their pair info.
3420     if ( @{$rlines} > @{$rnew_lines} ) {
3421         my $last_pair_info = -1;
3422         foreach my $line ( @{$rlines} ) {
3423             if ( $line->get_is_hanging_side_comment() ) {
3424                 $line->set_imax_pair($last_pair_info);
3425             }
3426             else {
3427                 $last_pair_info = $line->get_imax_pair();
3428             }
3429         }
3430     }
3431     return;
3432 }
3433
3434 sub fat_comma_to_comma {
3435     my ($str) = @_;
3436
3437     # We are changing '=>' to ',' and removing any trailing decimal count
3438     # because currently fat commas have a count and commas do not.
3439     # For example, we will change '=>2+{-3.2' into ',2+{-3'
3440     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3441     return $str;
3442 }
3443
3444 sub get_line_token_info {
3445
3446     # scan lines of tokens and return summary information about the range of
3447     # levels and patterns.
3448     my ($rlines) = @_;
3449
3450     # First scan to check monotonicity. Here is an example of several
3451     # lines which are monotonic. The = is the lowest level, and
3452     # the commas are all one level deeper. So this is not nonmonotonic.
3453     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
3454     #  $$d{"days"}    = [ "d",  "day", "days" ];
3455     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
3456     my @all_token_info;
3457     my $all_monotonic = 1;
3458     foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3459         my ($line) = $rlines->[$jj];
3460         my $rtokens = $line->get_rtokens();
3461         my $last_lev;
3462         my $is_monotonic = 1;
3463         my $i            = -1;
3464         foreach my $tok ( @{$rtokens} ) {
3465             $i++;
3466             my ( $raw_tok, $lev, $tag, $tok_count ) =
3467               decode_alignment_token($tok);
3468             push @{ $all_token_info[$jj] },
3469               [ $raw_tok, $lev, $tag, $tok_count ];
3470             last if ( $tok eq '#' );
3471             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3472             $last_lev = $lev;
3473         }
3474         if ( !$is_monotonic ) { $all_monotonic = 0 }
3475     }
3476
3477     my $rline_values = [];
3478     foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3479         my ($line) = $rlines->[$jj];
3480
3481         my $rtokens = $line->get_rtokens();
3482         my $i       = -1;
3483         my ( $lev_min, $lev_max );
3484         my $token_pattern_max = EMPTY_STRING;
3485         my %saw_level;
3486         my $is_monotonic = 1;
3487
3488         # find the index of the last token before the side comment
3489         my $imax      = @{$rtokens} - 2;
3490         my $imax_true = $imax;
3491
3492         # If the entire group is monotonic, and the line ends in a comma list,
3493         # walk it back to the first such comma. this will have the effect of
3494         # making all trailing ragged comma lists match in the prune tree
3495         # routine.  these trailing comma lists can better be handled by later
3496         # alignment rules.
3497
3498         # Treat fat commas the same as commas here by converting them to
3499         # commas.  This will improve the chance of aligning the leading parts
3500         # of ragged lists.
3501
3502         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3503         if ( $all_monotonic && $tok_end =~ /^,/ ) {
3504             my $ii = $imax - 1;
3505             while ( $ii >= 0
3506                 && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3507             {
3508                 $imax = $ii;
3509                 $ii--;
3510             }
3511         }
3512
3513         # make a first pass to find level range
3514         my $last_lev;
3515         foreach my $tok ( @{$rtokens} ) {
3516             $i++;
3517             last if ( $i > $imax );
3518             last if ( $tok eq '#' );
3519             my ( $raw_tok, $lev, $tag, $tok_count ) =
3520               @{ $all_token_info[$jj]->[$i] };
3521
3522             last if ( $tok eq '#' );
3523             $token_pattern_max .= $tok;
3524             $saw_level{$lev}++;
3525             if ( !defined($lev_min) ) {
3526                 $lev_min = $lev;
3527                 $lev_max = $lev;
3528             }
3529             else {
3530                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
3531                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
3532                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3533             }
3534             $last_lev = $lev;
3535         }
3536
3537         # handle no levels
3538         my $rtoken_patterns = {};
3539         my $rtoken_indexes  = {};
3540         my @levs            = sort keys %saw_level;
3541         if ( !defined($lev_min) ) {
3542             $lev_min                     = -1;
3543             $lev_max                     = -1;
3544             $levs[0]                     = -1;
3545             $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3546             $rtoken_indexes->{$lev_min}  = [];
3547         }
3548
3549         # handle one level
3550         elsif ( $lev_max == $lev_min ) {
3551             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3552             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3553         }
3554
3555         # handle multiple levels
3556         else {
3557             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3558             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3559
3560             my $debug   = 0;
3561             my $lev_top = pop @levs;    # alread did max level
3562             my $itok    = -1;
3563             foreach my $tok ( @{$rtokens} ) {
3564                 $itok++;
3565                 last if ( $itok > $imax );
3566                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3567                   @{ $all_token_info[$jj]->[$itok] };
3568                 last if ( $raw_tok eq '#' );
3569                 foreach my $lev_test (@levs) {
3570                     next if ( $lev > $lev_test );
3571                     $rtoken_patterns->{$lev_test} .= $tok;
3572                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
3573                 }
3574             }
3575             push @levs, $lev_top;
3576         }
3577
3578         push @{$rline_values},
3579           [
3580             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3581             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3582           ];
3583
3584         # debug
3585         0 && do {
3586             local $LIST_SEPARATOR = ')(';
3587             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3588             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3589                 print "$key => $rtoken_patterns->{$key}\n";
3590                 print "$key => @{$rtoken_indexes->{$key}}\n";
3591             }
3592         };
3593     } ## end loop over lines
3594     return ( $rline_values, $all_monotonic );
3595 }
3596
3597 sub prune_alignment_tree {
3598     my ($rlines) = @_;
3599     my $jmax = @{$rlines} - 1;
3600     return unless $jmax > 0;
3601
3602     # Vertical alignment in perltidy is done as an iterative process.  The
3603     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3604     # etc) for vertical alignment.  Then we have to delete all alignments
3605     # which, if actually made, would detract from overall alignment.  This
3606     # is done in several phases of which this is one.
3607
3608     # In this routine we look at the alignments of a group of lines as a
3609     # hierarchical tree.  We will 'prune' the tree to limited depths if that
3610     # will improve overall alignment at the lower depths.
3611     # For each line we will be looking at its alignment patterns down to
3612     # different fixed depths. For each depth, we include all lower depths and
3613     # ignore all higher depths.  We want to see if we can get alignment of a
3614     # larger group of lines if we ignore alignments at some lower depth.
3615     # Here is an # example:
3616
3617     # for (
3618     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
3619     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
3620     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
3621     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
3622     # );
3623
3624     # In the above example, all lines have three commas at the lowest depth
3625     # (zero), so if there were no other alignments, these lines would all
3626     # align considering only the zero depth alignment token.  But some lines
3627     # have additional comma alignments at the next depth, so we need to decide
3628     # if we should drop those to keep the top level alignments, or keep those
3629     # for some additional low level alignments at the expense losing some top
3630     # level alignments.  In this case we will drop the deeper level commas to
3631     # keep the entire collection aligned.  But in some cases the decision could
3632     # go the other way.
3633
3634     # The tree for this example at the zero depth has one node containing
3635     # all four lines, since they are identical at zero level (three commas).
3636     # At depth one, there are three 'children' nodes, namely:
3637     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3638     # - line 3, which has 2 commas at depth 1
3639     # - line4, which has a ';' and a ',' at depth 1
3640     # There are no deeper alignments in this example.
3641     # so the tree structure for this example is:
3642     #
3643     #    depth 0         depth 1      depth 2
3644     #    [lines 1-4] --  [line 1-2] -  (empty)
3645     #                 |  [line 3]   -  (empty)
3646     #                 |  [line 4]   -  (empty)
3647
3648     # We can carry this to any depth, but it is not really useful to go below
3649     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3650     # alignments at depth >=2.
3651
3652     use constant EXPLAIN_PRUNE => 0;
3653
3654     ####################################################################
3655     # Prune Tree Step 1. Start by scanning the lines and collecting info
3656     ####################################################################
3657
3658     # Note that the caller had this info but we have to redo this now because
3659     # alignment tokens may have been deleted.
3660     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3661
3662     # If all the lines have levels which increase monotonically from left to
3663     # right, then the sweep-left-to-right pass can do a better job of alignment
3664     # than pruning, and without deleting alignments.
3665     return if ($all_monotonic);
3666
3667     # Contents of $rline_values
3668     #   [
3669     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3670     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3671     #   ];
3672
3673     # We can work to any depth, but there is little advantage to working
3674     # to a a depth greater than 2
3675     my $MAX_DEPTH = 2;
3676
3677     # This arrays will hold the tree of alignment tokens at different depths
3678     # for these lines.
3679     my @match_tree;
3680
3681     # Tree nodes contain these values:
3682     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3683     #                        $nc_beg_p, $nc_end_p, $rindexes];
3684     # where
3685     #      $depth = 0,1,2 = index of depth of the match
3686
3687     #  $jbeg beginning index j of the range of lines in this match
3688     #  $jend ending index j of the range of lines in this match
3689     #  $n_parent = index of the containing group at $depth-1, if it exists
3690     #  $level = actual level of code being matched in this group
3691     #  $pattern = alignment pattern being matched
3692     #  $nc_beg_p = first child
3693     #  $nc_end_p = last child
3694     #  $rindexes = ref to token indexes
3695
3696     # the patterns and levels of the current group being formed at each depth
3697     my ( @token_patterns_current, @levels_current, @token_indexes_current );
3698
3699     # the patterns and levels of the next line being tested at each depth
3700     my ( @token_patterns_next, @levels_next, @token_indexes_next );
3701
3702     #########################################################
3703     # define a recursive worker subroutine for tree construction
3704     #########################################################
3705
3706     # This is a recursive routine which is called if a match condition changes
3707     # at any depth when a new line is encountered.  It ends the match node
3708     # which changed plus all deeper nodes attached to it.
3709     my $end_node;
3710     $end_node = sub {
3711         my ( $depth, $jl, $n_parent ) = @_;
3712
3713         # $depth is the tree depth
3714         # $jl is the  index of the line
3715         # $n_parent is index of the parent node of this node
3716
3717         return if ( $depth > $MAX_DEPTH );
3718
3719         # end any current group at this depth
3720         if (   $jl >= 0
3721             && defined( $match_tree[$depth] )
3722             && @{ $match_tree[$depth] }
3723             && defined( $levels_current[$depth] ) )
3724         {
3725             $match_tree[$depth]->[-1]->[1] = $jl;
3726         }
3727
3728         # Define the index of the node we will create below
3729         my $ng_self = 0;
3730         if ( defined( $match_tree[$depth] ) ) {
3731             $ng_self = @{ $match_tree[$depth] };
3732         }
3733
3734         # end any next deeper child node(s)
3735         $end_node->( $depth + 1, $jl, $ng_self );
3736
3737         # update the levels being matched
3738         $token_patterns_current[$depth] = $token_patterns_next[$depth];
3739         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
3740         $levels_current[$depth]         = $levels_next[$depth];
3741
3742         # Do not start a new group at this level if it is not being used
3743         if ( !defined( $levels_next[$depth] )
3744             || $depth > 0
3745             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3746         {
3747             return;
3748         }
3749
3750         # Create a node for the next group at this depth. We initially assume
3751         # that it will continue to $jmax, and correct that later if the node
3752         # ends earlier.
3753         push @{ $match_tree[$depth] },
3754           [
3755             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3756             $token_patterns_current[$depth],
3757             undef, undef, $token_indexes_current[$depth],
3758           ];
3759
3760         return;
3761     };    ## end sub end_node
3762
3763     ######################################################
3764     # Prune Tree Step 2. Loop to form the tree of matches.
3765     ######################################################
3766     foreach my $jp ( 0 .. $jmax ) {
3767
3768         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3769         my $jm = $jp - 1;
3770
3771         # Pull out needed values for the next line
3772         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3773             $is_monotonic, $imax_true, $imax )
3774           = @{ $rline_values->[$jp] };
3775
3776         # Transfer levels and patterns for this line to the working arrays.
3777         # If the number of levels differs from our chosen MAX_DEPTH ...
3778         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3779         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3780         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3781         if ( @{$rlevs} > $MAX_DEPTH ) {
3782             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3783         }
3784         my $depth = 0;
3785         foreach my $item (@levels_next) {
3786             $token_patterns_next[$depth] =
3787               defined($item) ? $rtoken_patterns->{$item} : undef;
3788             $token_indexes_next[$depth] =
3789               defined($item) ? $rtoken_indexes->{$item} : undef;
3790             $depth++;
3791         }
3792
3793         # Look for a change in match groups...
3794
3795         # Initialize on the first line
3796         if ( $jp == 0 ) {
3797             my $n_parent;
3798             $end_node->( 0, $jm, $n_parent );
3799         }
3800
3801         # End groups if a hard flag has been set
3802         elsif ( $rlines->[$jm]->get_end_group() ) {
3803             my $n_parent;
3804             $end_node->( 0, $jm, $n_parent );
3805         }
3806
3807         # Continue at hanging side comment
3808         elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
3809             next;
3810         }
3811
3812         # Otherwise see if anything changed and update the tree if so
3813         else {
3814             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3815
3816                 my $def_current = defined( $token_patterns_current[$depth] );
3817                 my $def_next    = defined( $token_patterns_next[$depth] );
3818                 last unless ( $def_current || $def_next );
3819                 if (   !$def_current
3820                     || !$def_next
3821                     || $token_patterns_current[$depth] ne
3822                     $token_patterns_next[$depth] )
3823                 {
3824                     my $n_parent;
3825                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3826                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3827                     }
3828                     $end_node->( $depth, $jm, $n_parent );
3829                     last;
3830                 }
3831             }
3832         }
3833     } ## end loop to form tree of matches
3834
3835     ##########################################################
3836     # Prune Tree Step 3. Make links from parent to child nodes
3837     ##########################################################
3838
3839     # It seemed cleaner to do this as a separate step rather than during tree
3840     # construction.  The children nodes have links up to the parent node which
3841     # created them.  Now make links in the opposite direction, so the parents
3842     # can find the children.  We store the range of children nodes ($nc_beg,
3843     # $nc_end) of each parent with two additional indexes in the original array.
3844     # These will be undef if no children.
3845     foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3846         next unless defined( $match_tree[$depth] );
3847         my $nc_max = @{ $match_tree[$depth] } - 1;
3848         my $np_now;
3849         foreach my $nc ( 0 .. $nc_max ) {
3850             my $np = $match_tree[$depth]->[$nc]->[2];
3851             if ( !defined($np) ) {
3852
3853                 # shouldn't happen
3854                 #print STDERR "lost child $np at depth $depth\n";
3855                 next;
3856             }
3857             if ( !defined($np_now) || $np != $np_now ) {
3858                 $np_now = $np;
3859                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3860             }
3861             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3862         }
3863     } ## end loop to make links down to the child nodes
3864
3865     EXPLAIN_PRUNE > 0 && do {
3866         print "Tree complete. Found these groups:\n";
3867         foreach my $depth ( 0 .. $MAX_DEPTH ) {
3868             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3869         }
3870     };
3871
3872     #######################################################
3873     # Prune Tree Step 4. Make a list of nodes to be deleted
3874     #######################################################
3875
3876     #  list of lines with tokens to be deleted:
3877     #  [$jbeg, $jend, $level_keep]
3878     #  $jbeg..$jend is the range of line indexes,
3879     #  $level_keep is the minimum level to keep
3880     my @delete_list;
3881
3882     # Not currently used:
3883     #  Groups with ending comma lists and their range of sizes:
3884     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3885     ## my %ragged_comma_group;
3886
3887     # Define a threshold line count for forcing a break
3888     my $nlines_break = 3;
3889
3890     # We work with a list of nodes to visit at the next deeper depth.
3891     my @todo_list;
3892     if ( defined( $match_tree[0] ) ) {
3893         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3894     }
3895
3896     foreach my $depth ( 0 .. $MAX_DEPTH ) {
3897         last unless (@todo_list);
3898         my @todo_next;
3899         foreach my $np (@todo_list) {
3900             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3901                 $rindexes_p )
3902               = @{ $match_tree[$depth]->[$np] };
3903             my $nlines_p = $jend_p - $jbeg_p + 1;
3904
3905             # nothing to do if no children
3906             next unless defined($nc_beg_p);
3907
3908             # Define the number of lines to either keep or delete a child node.
3909             # This is the key decision we have to make.  We want to delete
3910             # short runs of matched lines, and keep long runs.  It seems easier
3911             # for the eye to follow breaks in monotonic level changes than
3912             # non-monotonic level changes.  For example, the following looks
3913             # best if we delete the lower level alignments:
3914
3915             #  [1]                  ~~ [];
3916             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3917             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
3918             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
3919             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
3920             #  $deep1               ~~ $deep1;
3921
3922             # So we will use two thresholds.
3923             my $nmin_mono     = $depth + 2;
3924             my $nmin_non_mono = $depth + 6;
3925             if ( $nmin_mono > $nlines_p - 1 ) {
3926                 $nmin_mono = $nlines_p - 1;
3927             }
3928             if ( $nmin_non_mono > $nlines_p - 1 ) {
3929                 $nmin_non_mono = $nlines_p - 1;
3930             }
3931
3932             # loop to keep or delete each child node
3933             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3934                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3935                     $nc_end_c )
3936                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
3937                 my $nlines_c     = $jend_c - $jbeg_c + 1;
3938                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3939                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3940                 if ( $nlines_c < $nmin ) {
3941 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3942                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3943                 }
3944                 else {
3945 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3946                     push @todo_next, $nc;
3947                 }
3948             }
3949         }
3950         @todo_list = @todo_next;
3951     } ## end loop to mark nodes to delete
3952
3953     #############################################################
3954     # Prune Tree Step 5. Loop to delete selected alignment tokens
3955     #############################################################
3956     foreach my $item (@delete_list) {
3957         my ( $jbeg, $jend, $level_keep ) = @{$item};
3958         foreach my $jj ( $jbeg .. $jend ) {
3959             my $line = $rlines->[$jj];
3960             my @idel;
3961             my $rtokens = $line->get_rtokens();
3962             my $imax    = @{$rtokens} - 2;
3963             foreach my $i ( 0 .. $imax ) {
3964                 my $tok = $rtokens->[$i];
3965                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3966                   decode_alignment_token($tok);
3967                 if ( $lev > $level_keep ) {
3968                     push @idel, $i;
3969                 }
3970             }
3971             if (@idel) {
3972                 delete_selected_tokens( $line, \@idel );
3973             }
3974         }
3975     } ## end loop to delete selected alignment tokens
3976
3977     return;
3978 } ## end sub prune_alignment_tree
3979
3980 sub Dump_tree_groups {
3981     my ( $rgroup, $msg ) = @_;
3982     print "$msg\n";
3983     local $LIST_SEPARATOR = ')(';
3984     foreach my $item ( @{$rgroup} ) {
3985         my @fix = @{$item};
3986         foreach my $val (@fix) { $val = "undef" unless defined $val; }
3987         $fix[4] = "...";
3988         print "(@fix)\n";
3989     }
3990     return;
3991 }
3992
3993 {    ## closure for sub is_marginal_match
3994
3995     my %is_if_or;
3996     my %is_assignment;
3997     my %is_good_alignment;
3998
3999     # This test did not give sufficiently better results to use as an update,
4000     # but the flag is worth keeping as a starting point for future testing.
4001     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
4002
4003     BEGIN {
4004
4005         my @q = qw(
4006           if unless or ||
4007         );
4008         @is_if_or{@q} = (1) x scalar(@q);
4009
4010         @q = qw(
4011           = **= += *= &= <<= &&=
4012           -= /= |= >>= ||= //=
4013           .= %= ^=
4014           x=
4015         );
4016         @is_assignment{@q} = (1) x scalar(@q);
4017
4018         # Vertically aligning on certain "good" tokens is usually okay
4019         # so we can be less restrictive in marginal cases.
4020         @q = qw( { ? => = );
4021         push @q, (',');
4022         @is_good_alignment{@q} = (1) x scalar(@q);
4023     }
4024
4025     sub is_marginal_match {
4026
4027         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
4028
4029         # Decide if we should undo some or all of the common alignments of a
4030         # group of just two lines.
4031
4032         # Given:
4033         #   $line_0 and $line_1 - the two lines
4034         #   $group_level = the indentation level of the group being processed
4035         #   $imax_align = the maximum index of the common alignment tokens
4036         #                 of the two lines
4037         #   $imax_prev  = the maximum index of the common alignment tokens
4038         #                 with the line before $line_0 (=-1 of does not exist)
4039
4040         # Return:
4041         #   $is_marginal = true if the two lines should NOT be fully aligned
4042         #                = false if the two lines can remain fully aligned
4043         #   $imax_align  = the index of the highest alignment token shared by
4044         #                  these two lines to keep if the match is marginal.
4045
4046         # When we have an alignment group of just two lines like this, we are
4047         # working in the twilight zone of what looks good and what looks bad.
4048         # This routine is a collection of rules which work have been found to
4049         # work fairly well, but it will need to be updated from time to time.
4050
4051         my $is_marginal = 0;
4052
4053         # always keep alignments of a terminal else or ternary
4054         goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
4055
4056         # always align lists
4057         my $group_list_type = $line_0->get_list_type();
4058         goto RETURN if ($group_list_type);
4059
4060         # always align hanging side comments
4061         my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
4062         goto RETURN if ($is_hanging_side_comment);
4063
4064         my $jmax_0           = $line_0->get_jmax();
4065         my $jmax_1           = $line_1->get_jmax();
4066         my $rtokens_1        = $line_1->get_rtokens();
4067         my $rtokens_0        = $line_0->get_rtokens();
4068         my $rfield_lengths_0 = $line_0->get_rfield_lengths();
4069         my $rfield_lengths_1 = $line_1->get_rfield_lengths();
4070         my $rpatterns_0      = $line_0->get_rpatterns();
4071         my $rpatterns_1      = $line_1->get_rpatterns();
4072         my $imax_next        = $line_1->get_imax_pair();
4073
4074         # We will scan the alignment tokens and set a flag '$is_marginal' if
4075         # it seems that the an alignment would look bad.
4076         my $max_pad            = 0;
4077         my $saw_good_alignment = 0;
4078         my $saw_if_or;                # if we saw an 'if' or 'or' at group level
4079         my $raw_tokb = EMPTY_STRING;  # first token seen at group level
4080         my $jfirst_bad;
4081         my $line_ending_fat_comma;    # is last token just a '=>' ?
4082         my $j0_eq_pad;
4083         my $j0_max_pad = 0;
4084
4085         foreach my $j ( 0 .. $jmax_1 - 2 ) {
4086             my ( $raw_tok, $lev, $tag, $tok_count ) =
4087               decode_alignment_token( $rtokens_1->[$j] );
4088             if ( $raw_tok && $lev == $group_level ) {
4089                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
4090                 $saw_if_or ||= $is_if_or{$raw_tok};
4091             }
4092
4093             # When the first of the two lines ends in a bare '=>' this will
4094             # probably be marginal match.  (For a bare =>, the next field length
4095             # will be 2 or 3, depending on side comment)
4096             $line_ending_fat_comma =
4097                  $j == $jmax_1 - 2
4098               && $raw_tok eq '=>'
4099               && $rfield_lengths_0->[ $j + 1 ] <= 3;
4100
4101             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4102             if ( $j == 0 ) {
4103                 $pad += $line_1->get_leading_space_count() -
4104                   $line_0->get_leading_space_count();
4105
4106                 # Remember the pad at a leading equals
4107                 if ( $raw_tok eq '=' && $lev == $group_level ) {
4108                     $j0_eq_pad = $pad;
4109                     $j0_max_pad =
4110                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4111                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4112                 }
4113             }
4114
4115             if ( $pad < 0 )        { $pad     = -$pad }
4116             if ( $pad > $max_pad ) { $max_pad = $pad }
4117             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4118                 $saw_good_alignment = 1;
4119             }
4120             else {
4121                 $jfirst_bad = $j unless defined($jfirst_bad);
4122             }
4123             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4124
4125                 # Flag this as a marginal match since patterns differ.
4126                 # Normally, we will not allow just two lines to match if
4127                 # marginal. But we can allow matching in some specific cases.
4128
4129                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
4130                 $is_marginal = 1  if ( $is_marginal == 0 );
4131                 if ( $raw_tok eq '=' ) {
4132
4133                     # Here is an example of a marginal match:
4134                     #       $done{$$op} = 1;
4135                     #       $op         = compile_bblock($op);
4136                     # The left tokens are both identifiers, but
4137                     # one accesses a hash and the other doesn't.
4138                     # We'll let this be a tentative match and undo
4139                     # it later if we don't find more than 2 lines
4140                     # in the group.
4141                     $is_marginal = 2;
4142                 }
4143             }
4144         }
4145
4146         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4147
4148         # Turn off the "marginal match" flag in some cases...
4149         # A "marginal match" occurs when the alignment tokens agree
4150         # but there are differences in the other tokens (patterns).
4151         # If we leave the marginal match flag set, then the rule is that we
4152         # will align only if there are more than two lines in the group.
4153         # We will turn of the flag if we almost have a match
4154         # and either we have seen a good alignment token or we
4155         # just need a small pad (2 spaces) to fit.  These rules are
4156         # the result of experimentation.  Tokens which misaligned by just
4157         # one or two characters are annoying.  On the other hand,
4158         # large gaps to less important alignment tokens are also annoying.
4159         if ( $is_marginal == 1
4160             && ( $saw_good_alignment || $max_pad < 3 ) )
4161         {
4162             $is_marginal = 0;
4163         }
4164
4165         # We will use the line endings to help decide on alignments...
4166         # See if the lines end with semicolons...
4167         my $sc_term0;
4168         my $sc_term1;
4169         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4170
4171             # shouldn't happen
4172         }
4173         else {
4174             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4175             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4176             $sc_term0 = $pat0 =~ /;b?$/;
4177             $sc_term1 = $pat1 =~ /;b?$/;
4178         }
4179
4180         if ( !$is_marginal && !$sc_term0 ) {
4181
4182             # First line of assignment should be semicolon terminated.
4183             # For example, do not align here:
4184             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4185             #    $$href{-NUM_DIRS} = 0;
4186             if ( $is_assignment{$raw_tokb} ) {
4187                 $is_marginal = 1;
4188             }
4189         }
4190
4191         # Try to avoid some undesirable alignments of opening tokens
4192         # for example, the space between grep and { here:
4193         #  return map { ( $_ => $_ ) }
4194         #    grep     { /$handles/ } $self->_get_delegate_method_list;
4195         $is_marginal ||=
4196              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4197           && $jmax_1 == 2
4198           && $sc_term0 ne $sc_term1;
4199
4200         ########################################
4201         # return unless this is a marginal match
4202         ########################################
4203         goto RETURN if ( !$is_marginal );
4204
4205         # Undo the marginal match flag in certain cases,
4206
4207         # Two lines with a leading equals-like operator are allowed to
4208         # align if the patterns to the left of the equals are the same.
4209         # For example the following two lines are a marginal match but have
4210         # the same left side patterns, so we will align the equals.
4211         #     my $orig = my $format = "^<<<<< ~~\n";
4212         #     my $abc  = "abc";
4213         # But these have a different left pattern so they will not be
4214         # aligned
4215         #     $xmldoc .= $`;
4216         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
4217
4218         # First line semicolon terminated but second not, usually ok:
4219         #               my $want = "'ab', 'a', 'b'";
4220         #               my $got  = join( ", ",
4221         #                    map { defined($_) ? "'$_'" : "undef" }
4222         #                          @got );
4223         #  First line not semicolon terminated, Not OK to match:
4224         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4225         #      $$href{-NUM_DIRS} = 0;
4226         my $pat0 = $rpatterns_0->[0];
4227         my $pat1 = $rpatterns_1->[0];
4228
4229         ##########################################################
4230         # Turn off the marginal flag for some types of assignments
4231         ##########################################################
4232         if ( $is_assignment{$raw_tokb} ) {
4233
4234             # undo marginal flag if first line is semicolon terminated
4235             # and leading patters match
4236             if ($sc_term0) {    # && $sc_term1) {
4237                 $is_marginal = $pat0 ne $pat1;
4238             }
4239         }
4240         elsif ( $raw_tokb eq '=>' ) {
4241
4242             # undo marginal flag if patterns match
4243             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4244         }
4245         elsif ( $raw_tokb eq '=~' ) {
4246
4247             # undo marginal flag if both lines are semicolon terminated
4248             # and leading patters match
4249             if ( $sc_term1 && $sc_term0 ) {
4250                 $is_marginal = $pat0 ne $pat1;
4251             }
4252         }
4253
4254         ######################################################
4255         # Turn off the marginal flag if we saw an 'if' or 'or'
4256         ######################################################
4257
4258         # A trailing 'if' and 'or' often gives a good alignment
4259         # For example, we can align these:
4260         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
4261         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4262
4263         # or
4264         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
4265         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4266
4267         if ($saw_if_or) {
4268
4269             # undo marginal flag if both lines are semicolon terminated
4270             if ( $sc_term0 && $sc_term1 ) {
4271                 $is_marginal = 0;
4272             }
4273         }
4274
4275         # For a marginal match, only keep matches before the first 'bad' match
4276         if (   $is_marginal
4277             && defined($jfirst_bad)
4278             && $imax_align > $jfirst_bad - 1 )
4279         {
4280             $imax_align = $jfirst_bad - 1;
4281         }
4282
4283         ###########################################################
4284         # Allow sweep to match lines with leading '=' in some cases
4285         ###########################################################
4286         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4287
4288             if (
4289
4290                 # If there is a following line with leading equals, or
4291                 # preceding line with leading equals, then let the sweep align
4292                 # them without restriction.  For example, the first two lines
4293                 # here are a marginal match, but they are followed by a line
4294                 # with leading equals, so the sweep-lr logic can align all of
4295                 # the lines:
4296
4297                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4298                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4299                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4300                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4301
4302                 # Likewise, if we reverse the two pairs we want the same result
4303
4304                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4305                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4306                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4307                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4308
4309                 (
4310                        $imax_next >= 0
4311                     || $imax_prev >= 0
4312                     || TEST_MARGINAL_EQ_ALIGNMENT
4313                 )
4314                 && $j0_eq_pad >= -$j0_max_pad
4315                 && $j0_eq_pad <= $j0_max_pad
4316               )
4317             {
4318
4319                 # But do not do this if there is a comma before the '='.
4320                 # For example, the first two lines below have commas and
4321                 # therefore are not allowed to align with lines 3 & 4:
4322
4323                 # my ( $x, $y ) = $self->Size();                      #<--line_0
4324                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4325                 # my $vx = $right - $left;
4326                 # my $vy = $bottom - $top;
4327
4328                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4329                     $imax_align = 0;
4330                 }
4331             }
4332         }
4333
4334       RETURN:
4335         return ( $is_marginal, $imax_align );
4336     }
4337 }
4338
4339 sub get_extra_leading_spaces {
4340
4341     my ( $rlines, $rgroups ) = @_;
4342
4343     #----------------------------------------------------------
4344     # Define any extra indentation space (for the -lp option).
4345     # Here is why:
4346     # If a list has side comments, sub scan_list must dump the
4347     # list before it sees everything.  When this happens, it sets
4348     # the indentation to the standard scheme, but notes how
4349     # many spaces it would have liked to use.  We may be able
4350     # to recover that space here in the event that all of the
4351     # lines of a list are back together again.
4352     #----------------------------------------------------------
4353
4354     return 0 unless ( @{$rlines} && @{$rgroups} );
4355
4356     my $object = $rlines->[0]->get_indentation();
4357     return 0 unless ( ref($object) );
4358     my $extra_leading_spaces            = 0;
4359     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4360     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4361
4362     my $min_spaces = $extra_indentation_spaces_wanted;
4363     if ( $min_spaces > 0 ) { $min_spaces = 0 }
4364
4365     # loop over all groups
4366     my $ng      = -1;
4367     my $ngroups = @{$rgroups};
4368     foreach my $item ( @{$rgroups} ) {
4369         $ng++;
4370         my ( $jbeg, $jend ) = @{$item};
4371         foreach my $j ( $jbeg .. $jend ) {
4372             next if ( $j == 0 );
4373
4374             # all indentation objects must be the same
4375             if ( $object != $rlines->[$j]->get_indentation() ) {
4376                 return 0;
4377             }
4378         }
4379
4380        # find the maximum space without exceeding the line length for this group
4381         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4382         my $spaces =
4383           ( $avail > $extra_indentation_spaces_wanted )
4384           ? $extra_indentation_spaces_wanted
4385           : $avail;
4386
4387         #########################################################
4388         # Note: min spaces can be negative; for example with -gnu
4389         # f(
4390         #   do { 1; !!(my $x = bless []); }
4391         #  );
4392         #########################################################
4393         # The following rule is needed to match older formatting:
4394         # For multiple groups, we will keep spaces non-negative.
4395         # For a single group, we will allow a negative space.
4396         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4397
4398         # update the minimum spacing
4399         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4400             $extra_leading_spaces = $spaces;
4401         }
4402     }
4403
4404     # update the indentation object because with -icp the terminal
4405     # ');' will use the same adjustment.
4406     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4407     return $extra_leading_spaces;
4408 }
4409
4410 sub forget_side_comment {
4411     my ($self) = @_;
4412     $self->[_last_side_comment_column_] = 0;
4413     return;
4414 }
4415
4416 sub is_good_side_comment_column {
4417     my ( $self, $line, $line_number, $level, $num5 ) = @_;
4418
4419     # Upon encountering the first side comment of a group, decide if
4420     # a previous side comment should be forgotten.  This involves
4421     # checking several rules.
4422
4423     # Return true to keep old comment location
4424     # Return false to forget old comment location
4425
4426     my $rfields                 = $line->get_rfields();
4427     my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4428
4429     # RULE1: Never forget comment before a hanging side comment
4430     goto KEEP if ($is_hanging_side_comment);
4431
4432     # RULE2: Forget a side comment after a short line difference,
4433     # where 'short line difference' is computed from a formula.
4434     # Using a smooth formula helps minimize sudden large changes.
4435     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4436     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4437
4438     # '$num5' is the number of comments in the first 5 lines after the first
4439     # comment.  It is needed to keep a compact group of side comments from
4440     # being influenced by a more distant side comment.
4441     $num5 = 1 unless ($num5);
4442
4443     # Some values:
4444
4445     #        $adiff  $num5   $short_diff
4446     #        0       *       12
4447     #        1       1       6
4448     #        1       2       4
4449     #        1       3       3
4450     #        1       4       2
4451     #        2       1       4
4452     #        2       2       2
4453     #        2       3       1
4454     #        3       1       3
4455     #        3       2       1
4456
4457     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4458
4459     goto FORGET
4460       if ( $line_diff > $short_diff
4461         || !$self->[_rOpts_valign_side_comments_] );
4462
4463     # RULE3: Forget a side comment if this line is at lower level and
4464     # ends a block
4465     my $last_sc_level = $self->[_last_side_comment_level_];
4466     goto FORGET
4467       if ( $level < $last_sc_level
4468         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4469
4470     # RULE 4: Forget the last side comment if this comment might join a cached
4471     # line ...
4472     if ( my $cached_line_type = get_cached_line_type() ) {
4473
4474         # ... otherwise side comment alignment will get messed up.
4475         # For example, in the following test script
4476         # with using 'perltidy -sct -act=2', the last comment would try to
4477         # align with the previous and then be in the wrong column when
4478         # the lines are combined:
4479
4480         # foreach $line (
4481         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
4482         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
4483         #    [0, 4, 8], [2, 4, 6]
4484         #  )                                     # diagonals
4485         goto FORGET
4486           if ( $cached_line_type == 2 || $cached_line_type == 4 );
4487     }
4488
4489     # Otherwise, keep it alive
4490     goto KEEP;
4491
4492   FORGET:
4493     return 0;
4494
4495   KEEP:
4496     return 1;
4497 }
4498
4499 sub align_side_comments {
4500
4501     my ( $self, $rlines, $rgroups ) = @_;
4502
4503     # Align any side comments in this batch of lines
4504
4505     # Given:
4506     #  $rlines  - the lines
4507     #  $rgroups - the partition of the lines into groups
4508     #
4509     # We will be working group-by-group because all side comments
4510     # (real or fake) in each group are already aligned. So we just have
4511     # to make alignments between groups wherever possible.
4512
4513     # An unusual aspect is that within each group we have aligned both real
4514     # and fake side comments.  This has the consequence that the lengths of
4515     # long lines without real side comments can cause 'push' all side comments
4516     # to the right.  This seems unusual, but testing with and without this
4517     # feature shows that it is usually better this way.  Otherwise, side
4518     # comments can be hidden between long lines without side comments and
4519     # thus be harder to read.
4520
4521     my $group_level        = $self->[_group_level_];
4522     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4523       && $group_level == $self->[_last_level_written_];
4524
4525     # Find groups with side comments, and remember the first nonblank comment
4526     my $j_sc_beg;
4527     my @todo;
4528     my $ng = -1;
4529     foreach my $item ( @{$rgroups} ) {
4530         $ng++;
4531         my ( $jbeg, $jend ) = @{$item};
4532         foreach my $j ( $jbeg .. $jend ) {
4533             my $line = $rlines->[$j];
4534             my $jmax = $line->get_jmax();
4535             if ( $line->get_rfield_lengths()->[$jmax] ) {
4536
4537                 # this group has a line with a side comment
4538                 push @todo, $ng;
4539                 if ( !defined($j_sc_beg) ) {
4540                     $j_sc_beg = $j;
4541                 }
4542                 last;
4543             }
4544         }
4545     }
4546
4547     # done if no groups with side comments
4548     return unless @todo;
4549
4550     # Count $num5 = number of comments in the 5 lines after the first comment
4551     # This is an important factor in a decision formula
4552     my $num5 = 1;
4553     foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
4554         my $ldiff = $jj - $j_sc_beg;
4555         last if ( $ldiff > 5 );
4556         my $line   = $rlines->[$jj];
4557         my $jmax   = $line->get_jmax();
4558         my $sc_len = $line->get_rfield_lengths()->[$jmax];
4559         next unless ($sc_len);
4560         $num5++;
4561     }
4562
4563     # Forget the old side comment location if necessary
4564     my $line_0 = $rlines->[$j_sc_beg];
4565     my $lnum =
4566       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4567     my $keep_it =
4568       $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4569     my $last_side_comment_column =
4570       $keep_it ? $self->[_last_side_comment_column_] : 0;
4571
4572     # If there are multiple groups we will do two passes
4573     # so that we can find a common alignment for all groups.
4574     my $MAX_PASS = @todo > 1 ? 2 : 1;
4575
4576     # Loop over passes
4577     my $max_comment_column = $last_side_comment_column;
4578     foreach my $PASS ( 1 .. $MAX_PASS ) {
4579
4580         # If there are two passes, then on the last pass make the old column
4581         # equal to the largest of the group.  This will result in the comments
4582         # being aligned if possible.
4583         if ( $PASS == $MAX_PASS ) {
4584             $last_side_comment_column = $max_comment_column;
4585         }
4586
4587         # Loop over the groups with side comments
4588         my $column_limit;
4589         foreach my $ng (@todo) {
4590             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4591
4592             # Note that since all lines in a group have common alignments, we
4593             # just have to work on one of the lines (the first line).
4594             my $line                    = $rlines->[$jbeg];
4595             my $jmax                    = $line->get_jmax();
4596             my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4597             last
4598               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4599
4600             # the maximum space without exceeding the line length:
4601             my $avail = $line->get_available_space_on_right();
4602
4603             # try to use the previous comment column
4604             my $side_comment_column = $line->get_column( $jmax - 1 );
4605             my $move = $last_side_comment_column - $side_comment_column;
4606
4607             # Remember the maximum possible column of the first line with
4608             # side comment
4609             if ( !defined($column_limit) ) {
4610                 $column_limit = $side_comment_column + $avail;
4611             }
4612
4613             next if ( $jmax <= 0 );
4614
4615             # but if this doesn't work, give up and use the minimum space
4616             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4617             if ( $move > $avail ) {
4618                 $move = $min_move;
4619             }
4620
4621             # but we want some minimum space to the comment
4622             if (   $move >= 0
4623                 && $j_sc_beg == 0
4624                 && $continuing_sc_flow )
4625             {
4626                 $min_move = 0;
4627             }
4628
4629             # remove constraints on hanging side comments
4630             if ($is_hanging_side_comment) { $min_move = 0 }
4631
4632             if ( $move < $min_move ) {
4633                 $move = $min_move;
4634             }
4635
4636             # don't exceed the available space
4637             if ( $move > $avail ) { $move = $avail }
4638
4639             # We can only increase space, never decrease.
4640             if ( $move < 0 ) { $move = 0 }
4641
4642             # Discover the largest column on the preliminary  pass
4643             if ( $PASS < $MAX_PASS ) {
4644                 my $col = $line->get_column( $jmax - 1 ) + $move;
4645
4646                 # but ignore columns too large for the starting line
4647                 if ( $col > $max_comment_column && $col < $column_limit ) {
4648                     $max_comment_column = $col;
4649                 }
4650             }
4651
4652             # Make the changes on the final pass
4653             else {
4654                 $line->increase_field_width( $jmax - 1, $move );
4655
4656                 # remember this column for the next group
4657                 $last_side_comment_column = $line->get_column( $jmax - 1 );
4658             }
4659         } ## end loop over groups
4660     } ## end loop over passes
4661
4662     # Find the last side comment
4663     my $j_sc_last;
4664     my $ng_last = $todo[-1];
4665     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4666     foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4667         my $line = $rlines->[$jj];
4668         my $jmax = $line->get_jmax();
4669         if ( $line->get_rfield_lengths()->[$jmax] ) {
4670             $j_sc_last = $jj;
4671             last;
4672         }
4673     }
4674
4675     # Save final side comment info for possible use by the next batch
4676     if ( defined($j_sc_last) ) {
4677         my $line_number =
4678           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4679         $self->[_last_side_comment_column_]      = $last_side_comment_column;
4680         $self->[_last_side_comment_line_number_] = $line_number;
4681         $self->[_last_side_comment_level_]       = $group_level;
4682     }
4683     return;
4684 }
4685
4686 ###############################
4687 # CODE SECTION 6: Output Step A
4688 ###############################
4689
4690 sub valign_output_step_A {
4691
4692     ###############################################################
4693     # This is Step A in writing vertically aligned lines.
4694     # The line is prepared according to the alignments which have
4695     # been found. Then it is shipped to the next step.
4696     ###############################################################
4697
4698     my ( $self, $rinput_hash ) = @_;
4699
4700     my $line                 = $rinput_hash->{line};
4701     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
4702     my $do_not_align         = $rinput_hash->{do_not_align};
4703     my $group_leader_length  = $rinput_hash->{group_leader_length};
4704     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4705     my $level                = $rinput_hash->{level};
4706     my $maximum_line_length  = $rinput_hash->{maximum_line_length};
4707
4708     my $rfields                   = $line->get_rfields();
4709     my $rfield_lengths            = $line->get_rfield_lengths();
4710     my $leading_space_count       = $line->get_leading_space_count();
4711     my $outdent_long_lines        = $line->get_outdent_long_lines();
4712     my $maximum_field_index       = $line->get_jmax();
4713     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
4714     my $Kend                      = $line->get_Kend();
4715     my $level_end                 = $line->get_level_end();
4716
4717     # add any extra spaces
4718     if ( $leading_space_count > $group_leader_length ) {
4719         $leading_space_count += $min_ci_gap;
4720     }
4721
4722     my $str     = $rfields->[0];
4723     my $str_len = $rfield_lengths->[0];
4724
4725     # loop to concatenate all fields of this line and needed padding
4726     my $total_pad_count = 0;
4727     for my $j ( 1 .. $maximum_field_index ) {
4728
4729         # skip zero-length side comments
4730         last
4731           if (
4732             ( $j == $maximum_field_index )
4733             && ( !defined( $rfields->[$j] )
4734                 || ( $rfield_lengths->[$j] == 0 ) )
4735           );
4736
4737         # compute spaces of padding before this field
4738         my $col = $line->get_column( $j - 1 );
4739         my $pad = $col - ( $str_len + $leading_space_count );
4740
4741         if ($do_not_align) {
4742             $pad =
4743               ( $j < $maximum_field_index )
4744               ? 0
4745               : $self->[_rOpts_minimum_space_to_comment_] - 1;
4746         }
4747
4748         # if the -fpsc flag is set, move the side comment to the selected
4749         # column if and only if it is possible, ignoring constraints on
4750         # line length and minimum space to comment
4751         if (   $self->[_rOpts_fixed_position_side_comment_]
4752             && $j == $maximum_field_index )
4753         {
4754             my $newpad =
4755               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4756             if ( $newpad >= 0 ) { $pad = $newpad; }
4757         }
4758
4759         # accumulate the padding
4760         if ( $pad > 0 ) { $total_pad_count += $pad; }
4761
4762         # only add padding when we have a finite field;
4763         # this avoids extra terminal spaces if we have empty fields
4764         if ( $rfield_lengths->[$j] > 0 ) {
4765             $str .= SPACE x $total_pad_count;
4766             $str_len += $total_pad_count;
4767             $total_pad_count = 0;
4768             $str .= $rfields->[$j];
4769             $str_len += $rfield_lengths->[$j];
4770         }
4771         else {
4772             $total_pad_count = 0;
4773         }
4774     }
4775
4776     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4777
4778     # ship this line off
4779     $self->valign_output_step_B(
4780         {
4781             leading_space_count => $leading_space_count + $extra_leading_spaces,
4782             line                => $str,
4783             line_length         => $str_len,
4784             side_comment_length => $side_comment_length,
4785             outdent_long_lines  => $outdent_long_lines,
4786             rvertical_tightness_flags => $rvertical_tightness_flags,
4787             level                     => $level,
4788             level_end                 => $level_end,
4789             Kend                      => $Kend,
4790             maximum_line_length       => $maximum_line_length,
4791         }
4792     );
4793     return;
4794 }
4795
4796 sub combine_fields {
4797
4798     # We have a group of two lines for which we do not want to align tokens
4799     # between index $imax_align and the side comment.  So we will delete fields
4800     # between $imax_align and the side comment.  Alignments have already
4801     # been set so we have to adjust them.
4802
4803     my ( $line_0, $line_1, $imax_align ) = @_;
4804
4805     if ( !defined($imax_align) ) { $imax_align = -1 }
4806
4807     # First delete the unwanted tokens
4808     my $jmax_old       = $line_0->get_jmax();
4809     my @old_alignments = $line_0->get_alignments();
4810     my @idel           = ( $imax_align + 1 .. $jmax_old - 2 );
4811
4812     return unless (@idel);
4813
4814     foreach my $line ( $line_0, $line_1 ) {
4815         delete_selected_tokens( $line, \@idel );
4816     }
4817
4818     # Now adjust the alignments.  Note that the side comment alignment
4819     # is always at jmax-1, and there is an ending alignment at jmax.
4820     my @new_alignments;
4821     if ( $imax_align >= 0 ) {
4822         @new_alignments[ 0 .. $imax_align ] =
4823           @old_alignments[ 0 .. $imax_align ];
4824     }
4825
4826     my $jmax_new = $line_0->get_jmax();
4827
4828     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4829     $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4830     $line_0->set_alignments(@new_alignments);
4831     $line_1->set_alignments(@new_alignments);
4832     return;
4833 }
4834
4835 sub get_output_line_number {
4836
4837     # The output line number reported to a caller =
4838     # the number of items still in the buffer +
4839     # the number of items written.
4840     return $_[0]->group_line_count() +
4841       $_[0]->[_file_writer_object_]->get_output_line_number();
4842 }
4843
4844 ###############################
4845 # CODE SECTION 7: Output Step B
4846 ###############################
4847
4848 {    ## closure for sub valign_output_step_B
4849
4850     # These are values for a cache used by valign_output_step_B.
4851     my $cached_line_text;
4852     my $cached_line_text_length;
4853     my $cached_line_type;
4854     my $cached_line_opening_flag;
4855     my $cached_line_closing_flag;
4856     my $cached_seqno;
4857     my $cached_line_valid;
4858     my $cached_line_leading_space_count;
4859     my $cached_seqno_string;
4860     my $cached_line_Kend;
4861     my $cached_line_maximum_length;
4862
4863     # These are passed to step_C:
4864     my $seqno_string;
4865     my $last_nonblank_seqno_string;
4866
4867     sub set_last_nonblank_seqno_string {
4868         my ($val) = @_;
4869         $last_nonblank_seqno_string = $val;
4870         return;
4871     }
4872
4873     sub get_cached_line_opening_flag {
4874         return $cached_line_opening_flag;
4875     }
4876
4877     sub get_cached_line_type {
4878         return $cached_line_type;
4879     }
4880
4881     sub set_cached_line_valid {
4882         my ($val) = @_;
4883         $cached_line_valid = $val;
4884         return;
4885     }
4886
4887     sub get_cached_seqno {
4888         return $cached_seqno;
4889     }
4890
4891     sub initialize_step_B_cache {
4892
4893         # valign_output_step_B cache:
4894         $cached_line_text                = EMPTY_STRING;
4895         $cached_line_text_length         = 0;
4896         $cached_line_type                = 0;
4897         $cached_line_opening_flag        = 0;
4898         $cached_line_closing_flag        = 0;
4899         $cached_seqno                    = 0;
4900         $cached_line_valid               = 0;
4901         $cached_line_leading_space_count = 0;
4902         $cached_seqno_string             = EMPTY_STRING;
4903         $cached_line_Kend                = undef;
4904         $cached_line_maximum_length      = undef;
4905
4906         # These vars hold a string of sequence numbers joined together used by
4907         # the cache
4908         $seqno_string               = EMPTY_STRING;
4909         $last_nonblank_seqno_string = EMPTY_STRING;
4910         return;
4911     }
4912
4913     sub _flush_cache {
4914         my ($self) = @_;
4915         if ($cached_line_type) {
4916             $seqno_string = $cached_seqno_string;
4917             $self->valign_output_step_C(
4918                 $seqno_string,
4919                 $last_nonblank_seqno_string,
4920
4921                 $cached_line_text,
4922                 $cached_line_leading_space_count,
4923                 $self->[_last_level_written_],
4924                 $cached_line_Kend,
4925             );
4926             $cached_line_type           = 0;
4927             $cached_line_text           = EMPTY_STRING;
4928             $cached_line_text_length    = 0;
4929             $cached_seqno_string        = EMPTY_STRING;
4930             $cached_line_Kend           = undef;
4931             $cached_line_maximum_length = undef;
4932         }
4933         return;
4934     }
4935
4936     sub valign_output_step_B {
4937
4938         ###############################################################
4939         # This is Step B in writing vertically aligned lines.
4940         # Vertical tightness is applied according to preset flags.
4941         # In particular this routine handles stacking of opening
4942         # and closing tokens.
4943         ###############################################################
4944
4945         my ( $self, $rinput ) = @_;
4946
4947         my $leading_space_count       = $rinput->{leading_space_count};
4948         my $str                       = $rinput->{line};
4949         my $str_length                = $rinput->{line_length};
4950         my $side_comment_length       = $rinput->{side_comment_length};
4951         my $outdent_long_lines        = $rinput->{outdent_long_lines};
4952         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4953         my $level                     = $rinput->{level};
4954         my $level_end                 = $rinput->{level_end};
4955         my $Kend                      = $rinput->{Kend};
4956         my $maximum_line_length       = $rinput->{maximum_line_length};
4957
4958         my $last_level_written = $self->[_last_level_written_];
4959
4960         # Useful -gcs test cases for wide characters are
4961         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
4962
4963         # handle outdenting of long lines:
4964         my $is_outdented_line;
4965         if ($outdent_long_lines) {
4966             my $excess =
4967               $str_length -
4968               $side_comment_length +
4969               $leading_space_count -
4970               $maximum_line_length;
4971             if ( $excess > 0 ) {
4972                 $leading_space_count = 0;
4973                 my $file_writer_object = $self->[_file_writer_object_];
4974                 my $last_outdented_line_at =
4975                   $file_writer_object->get_output_line_number();
4976                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
4977
4978                 my $outdented_line_count = $self->[_outdented_line_count_];
4979                 unless ($outdented_line_count) {
4980                     $self->[_first_outdented_line_at_] =
4981                       $last_outdented_line_at;
4982                 }
4983                 $outdented_line_count++;
4984                 $self->[_outdented_line_count_] = $outdented_line_count;
4985                 $is_outdented_line = 1;
4986             }
4987         }
4988
4989         # Make preliminary leading whitespace.  It could get changed
4990         # later by entabbing, so we have to keep track of any changes
4991         # to the leading_space_count from here on.
4992         my $leading_string =
4993           $leading_space_count > 0
4994           ? ( SPACE x $leading_space_count )
4995           : EMPTY_STRING;
4996         my $leading_string_length = length($leading_string);
4997
4998         # Unpack any recombination data; it was packed by
4999         # sub 'Formatter::set_vertical_tightness_flags'
5000
5001         # old   hash              Meaning
5002         # index key
5003         #
5004         # 0   _vt_type:           1=opening non-block    2=closing non-block
5005         #                         3=opening block brace  4=closing block brace
5006         #
5007         # 1a  _vt_opening_flag:  1=no multiple steps, 2=multiple steps ok
5008         # 1b  _vt_closing_flag:    spaces of padding to use if closing
5009         # 2   _vt_seqno:          sequence number of container
5010         # 3   _vt_valid flag:     do not append if this flag is false. Will be
5011         #           true if appropriate -vt flag is set.  Otherwise, Will be
5012         #           made true only for 2 line container in parens with -lp
5013         # 4   _vt_seqno_beg:      sequence number of first token of line
5014         # 5   _vt_seqno_end:      sequence number of last token of line
5015         # 6   _vt_min_lines:      min number of lines for joining opening cache,
5016         #                           0=no constraint
5017         # 7   _vt_max_lines:      max number of lines for joining opening cache,
5018         #                           0=no constraint
5019
5020         my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5021             $seqno_beg, $seqno_end );
5022         if ($rvertical_tightness_flags) {
5023
5024             $open_or_close = $rvertical_tightness_flags->{_vt_type};
5025             $opening_flag  = $rvertical_tightness_flags->{_vt_opening_flag};
5026             $closing_flag  = $rvertical_tightness_flags->{_vt_closing_flag};
5027             $seqno         = $rvertical_tightness_flags->{_vt_seqno};
5028             $valid         = $rvertical_tightness_flags->{_vt_valid_flag};
5029             $seqno_beg     = $rvertical_tightness_flags->{_vt_seqno_beg};
5030             $seqno_end     = $rvertical_tightness_flags->{_vt_seqno_end};
5031         }
5032
5033         $seqno_string = $seqno_end;
5034
5035         # handle any cached line ..
5036         # either append this line to it or write it out
5037         # Note: the function length() is used in this next test out of caution.
5038         # All testing has shown that the variable $cached_line_text_length is
5039         # correct, but its calculation is complex and a loss of cached text
5040         # would be a disaster.
5041         if ( length($cached_line_text) ) {
5042
5043             # Dump an invalid cached line
5044             if ( !$cached_line_valid ) {
5045                 $self->valign_output_step_C(
5046                     $seqno_string,
5047                     $last_nonblank_seqno_string,
5048
5049                     $cached_line_text,
5050                     $cached_line_leading_space_count,
5051                     $last_level_written,
5052                     $cached_line_Kend
5053                 );
5054             }
5055
5056             # Handle cached line ending in OPENING tokens
5057             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
5058
5059                 my $gap = $leading_space_count - $cached_line_text_length;
5060
5061                 # handle option of just one tight opening per line:
5062                 if ( $cached_line_opening_flag == 1 ) {
5063                     if ( defined($open_or_close) && $open_or_close == 1 ) {
5064                         $gap = -1;
5065                     }
5066                 }
5067
5068                 # Do not join the lines if this might produce a one-line
5069                 # container which exceeds the maximum line length.  This is
5070                 # necessary prevent blinking, particularly with the combination
5071                 # -xci -pvt=2.  In that case a one-line block alternately forms
5072                 # and breaks, causing -xci to alternately turn on and off (case
5073                 # b765).
5074                 # Patched to fix cases b656 b862 b971 b972: always do the check
5075                 # if the maximum line length changes (due to -vmll).
5076                 if (
5077                     $gap >= 0
5078                     && ( $maximum_line_length != $cached_line_maximum_length
5079                         || ( defined($level_end) && $level > $level_end ) )
5080                   )
5081                 {
5082                     my $test_line_length =
5083                       $cached_line_text_length + $gap + $str_length;
5084
5085                     # Add a small tolerance in the length test (fixes case b862)
5086                     if ( $test_line_length > $cached_line_maximum_length - 2 ) {
5087                         $gap = -1;
5088                     }
5089                 }
5090
5091                 if ( $gap >= 0 && defined($seqno_beg) ) {
5092                     $maximum_line_length   = $cached_line_maximum_length;
5093                     $leading_string        = $cached_line_text . SPACE x $gap;
5094                     $leading_string_length = $cached_line_text_length + $gap;
5095                     $leading_space_count   = $cached_line_leading_space_count;
5096                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5097                     $level        = $last_level_written;
5098                 }
5099                 else {
5100                     $self->valign_output_step_C(
5101                         $seqno_string,
5102                         $last_nonblank_seqno_string,
5103
5104                         $cached_line_text,
5105                         $cached_line_leading_space_count,
5106                         $last_level_written,
5107                         $cached_line_Kend
5108                     );
5109                 }
5110             }
5111
5112             # Handle cached line ending in CLOSING tokens
5113             else {
5114                 my $test_line =
5115                   $cached_line_text . SPACE x $cached_line_closing_flag . $str;
5116                 my $test_line_length =
5117                   $cached_line_text_length +
5118                   $cached_line_closing_flag +
5119                   $str_length;
5120                 if (
5121
5122                     # The new line must start with container
5123                     $seqno_beg
5124
5125                     # The container combination must be okay..
5126                     && (
5127
5128                         # okay to combine like types
5129                         ( $open_or_close == $cached_line_type )
5130
5131                         # closing block brace may append to non-block
5132                         || ( $cached_line_type == 2 && $open_or_close == 4 )
5133
5134                         # something like ');'
5135                         || ( !$open_or_close && $cached_line_type == 2 )
5136
5137                     )
5138
5139                     # The combined line must fit
5140                     && ( $test_line_length <= $cached_line_maximum_length )
5141                   )
5142                 {
5143
5144                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5145
5146                     # Patch to outdent closing tokens ending # in ');' If we
5147                     # are joining a line like ');' to a previous stacked set of
5148                     # closing tokens, then decide if we may outdent the
5149                     # combined stack to the indentation of the ');'.  Since we
5150                     # should not normally outdent any of the other tokens more
5151                     # than the indentation of the lines that contained them, we
5152                     # will only do this if all of the corresponding opening
5153                     # tokens were on the same line.  This can happen with -sot
5154                     # and -sct.
5155
5156                     # For example, it is ok here:
5157                     #   __PACKAGE__->load_components( qw(
5158                     #         PK::Auto
5159                     #         Core
5160                     #   ));
5161                     #
5162                     # But, for example, we do not outdent in this example
5163                     # because that would put the closing sub brace out farther
5164                     # than the opening sub brace:
5165                     #
5166                     #   perltidy -sot -sct
5167                     #   $c->Tk::bind(
5168                     #       '<Control-f>' => sub {
5169                     #           my ($c) = @_;
5170                     #           my $e = $c->XEvent;
5171                     #           itemsUnderArea $c;
5172                     #       } );
5173                     #
5174                     if (   $str =~ /^\);/
5175                         && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5176                     {
5177
5178                         # The way to tell this is if the stacked sequence
5179                         # numbers of this output line are the reverse of the
5180                         # stacked sequence numbers of the previous non-blank
5181                         # line of sequence numbers.  So we can join if the
5182                         # previous nonblank string of tokens is the mirror
5183                         # image.  For example if stack )}] is 13:8:6 then we
5184                         # are looking for a leading stack like [{( which
5185                         # is 6:8:13. We only need to check the two ends,
5186                         # because the intermediate tokens must fall in order.
5187                         # Note on speed: having to split on colons and
5188                         # eliminate multiple colons might appear to be slow,
5189                         # but it's not an issue because we almost never come
5190                         # through here.  In a typical file we don't.
5191
5192                         $seqno_string               =~ s/^:+//;
5193                         $last_nonblank_seqno_string =~ s/^:+//;
5194                         $seqno_string               =~ s/:+/:/g;
5195                         $last_nonblank_seqno_string =~ s/:+/:/g;
5196
5197                         # how many spaces can we outdent?
5198                         my $diff =
5199                           $cached_line_leading_space_count -
5200                           $leading_space_count;
5201                         if (   $diff > 0
5202                             && length($seqno_string)
5203                             && length($last_nonblank_seqno_string) ==
5204                             length($seqno_string) )
5205                         {
5206                             my @seqno_last =
5207                               ( split /:/, $last_nonblank_seqno_string );
5208                             my @seqno_now = ( split /:/, $seqno_string );
5209                             if (   @seqno_now
5210                                 && @seqno_last
5211                                 && $seqno_now[-1] == $seqno_last[0]
5212                                 && $seqno_now[0] == $seqno_last[-1] )
5213                             {
5214
5215                                 # OK to outdent ..
5216                                 # for absolute safety, be sure we only remove
5217                                 # whitespace
5218                                 my $ws = substr( $test_line, 0, $diff );
5219                                 if ( ( length($ws) == $diff )
5220                                     && $ws =~ /^\s+$/ )
5221                                 {
5222
5223                                     $test_line = substr( $test_line, $diff );
5224                                     $cached_line_leading_space_count -= $diff;
5225                                     $last_level_written =
5226                                       $self->level_change(
5227                                         $cached_line_leading_space_count,
5228                                         $diff, $last_level_written );
5229                                     $self->reduce_valign_buffer_indentation(
5230                                         $diff);
5231                                 }
5232
5233                                 # shouldn't happen, but not critical:
5234                                 ##else {
5235                                 ## ERROR transferring indentation here
5236                                 ##}
5237                             }
5238                         }
5239                     }
5240
5241                     # Change the args to look like we received the combined line
5242                     $str                   = $test_line;
5243                     $str_length            = $test_line_length;
5244                     $leading_string        = EMPTY_STRING;
5245                     $leading_string_length = 0;
5246                     $leading_space_count   = $cached_line_leading_space_count;
5247                     $level                 = $last_level_written;
5248                     $maximum_line_length   = $cached_line_maximum_length;
5249                 }
5250                 else {
5251                     $self->valign_output_step_C(
5252                         $seqno_string,
5253                         $last_nonblank_seqno_string,
5254
5255                         $cached_line_text,
5256                         $cached_line_leading_space_count,
5257                         $last_level_written,
5258                         $cached_line_Kend
5259                     );
5260                 }
5261             }
5262         }
5263         $cached_line_type           = 0;
5264         $cached_line_text           = EMPTY_STRING;
5265         $cached_line_text_length    = 0;
5266         $cached_line_Kend           = undef;
5267         $cached_line_maximum_length = undef;
5268
5269         # make the line to be written
5270         my $line        = $leading_string . $str;
5271         my $line_length = $leading_string_length + $str_length;
5272
5273         # Safety check: be sure that a line to be cached as a stacked block
5274         # brace line ends in the appropriate opening or closing block brace.
5275         # This should always be the case if the caller set flags correctly.
5276         # Code '3' is for -sobb, code '4' is for -scbb.
5277         if ($open_or_close) {
5278             if (   $open_or_close == 3 && $line !~ /\{\s*$/
5279                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5280             {
5281                 $open_or_close = 0;
5282             }
5283         }
5284
5285         # write or cache this line ...
5286         # fix for case b999: do not cache an outdented line
5287         if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
5288         {
5289             $self->valign_output_step_C(
5290                 $seqno_string,
5291                 $last_nonblank_seqno_string,
5292
5293                 $line,
5294                 $leading_space_count,
5295                 $level,
5296                 $Kend
5297             );
5298         }
5299         else {
5300             $cached_line_text                = $line;
5301             $cached_line_text_length         = $line_length;
5302             $cached_line_type                = $open_or_close;
5303             $cached_line_opening_flag        = $opening_flag;
5304             $cached_line_closing_flag        = $closing_flag;
5305             $cached_seqno                    = $seqno;
5306             $cached_line_valid               = $valid;
5307             $cached_line_leading_space_count = $leading_space_count;
5308             $cached_seqno_string             = $seqno_string;
5309             $cached_line_Kend                = $Kend;
5310             $cached_line_maximum_length      = $maximum_line_length;
5311         }
5312
5313         $self->[_last_level_written_]       = $level;
5314         $self->[_last_side_comment_length_] = $side_comment_length;
5315         return;
5316     }
5317 }
5318
5319 ###############################
5320 # CODE SECTION 8: Output Step C
5321 ###############################
5322
5323 {    ## closure for sub valign_output_step_C
5324
5325     # Vertical alignment buffer used by valign_output_step_C
5326     my $valign_buffer_filling;
5327     my @valign_buffer;
5328
5329     sub initialize_valign_buffer {
5330         @valign_buffer         = ();
5331         $valign_buffer_filling = EMPTY_STRING;
5332         return;
5333     }
5334
5335     sub dump_valign_buffer {
5336         my ($self) = @_;
5337         if (@valign_buffer) {
5338             foreach (@valign_buffer) {
5339                 $self->valign_output_step_D( @{$_} );
5340             }
5341             @valign_buffer = ();
5342         }
5343         $valign_buffer_filling = EMPTY_STRING;
5344         return;
5345     }
5346
5347     sub reduce_valign_buffer_indentation {
5348
5349         my ( $self, $diff ) = @_;
5350         if ( $valign_buffer_filling && $diff ) {
5351             my $max_valign_buffer = @valign_buffer;
5352             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5353                 my ( $line, $leading_space_count, $level, $Kend ) =
5354                   @{ $valign_buffer[$i] };
5355                 my $ws = substr( $line, 0, $diff );
5356                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5357                     $line = substr( $line, $diff );
5358                 }
5359                 if ( $leading_space_count >= $diff ) {
5360                     $leading_space_count -= $diff;
5361                     $level =
5362                       $self->level_change( $leading_space_count, $diff,
5363                         $level );
5364                 }
5365                 $valign_buffer[$i] =
5366                   [ $line, $leading_space_count, $level, $Kend ];
5367             }
5368         }
5369         return;
5370     }
5371
5372     sub valign_output_step_C {
5373
5374         ###############################################################
5375         # This is Step C in writing vertically aligned lines.
5376         # Lines are either stored in a buffer or passed along to the next step.
5377         # The reason for storing lines is that we may later want to reduce their
5378         # indentation when -sot and -sct are both used.
5379         ###############################################################
5380         my (
5381             $self,
5382             $seqno_string,
5383             $last_nonblank_seqno_string,
5384
5385             @args_to_D
5386         ) = @_;
5387
5388         # Dump any saved lines if we see a line with an unbalanced opening or
5389         # closing token.
5390         $self->dump_valign_buffer()
5391           if ( $seqno_string && $valign_buffer_filling );
5392
5393         # Either store or write this line
5394         if ($valign_buffer_filling) {
5395             push @valign_buffer, [@args_to_D];
5396         }
5397         else {
5398             $self->valign_output_step_D(@args_to_D);
5399         }
5400
5401         # For lines starting or ending with opening or closing tokens..
5402         if ($seqno_string) {
5403             $last_nonblank_seqno_string = $seqno_string;
5404             set_last_nonblank_seqno_string($seqno_string);
5405
5406             # Start storing lines when we see a line with multiple stacked
5407             # opening tokens.
5408             # patch for RT #94354, requested by Colin Williams
5409             if (   $seqno_string =~ /^\d+(\:+\d+)+$/
5410                 && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5411             {
5412
5413                 # This test is efficient but a little subtle: The first test
5414                 # says that we have multiple sequence numbers and hence
5415                 # multiple opening or closing tokens in this line.  The second
5416                 # part of the test rejects stacked closing and ternary tokens.
5417                 # So if we get here then we should have stacked unbalanced
5418                 # opening tokens.
5419
5420                 # Here is a complex example:
5421
5422                 # Foo($Bar[0], {  # (side comment)
5423                 #     baz => 1,
5424                 # });
5425
5426                 # The first line has sequence 6::4.  It does not begin with
5427                 # a closing token or ternary, so it passes the test and must be
5428                 # stacked opening tokens.
5429
5430                 # The last line has sequence 4:6 but is a stack of closing
5431                 # tokens, so it gets rejected.
5432
5433                 # Note that the sequence number of an opening token for a qw
5434                 # quote is a negative number and will be rejected.  For
5435                 # example, for the following line: skip_symbols([qw(
5436                 # $seqno_string='10:5:-1'.  It would be okay to accept it but I
5437                 # decided not to do this after testing.
5438
5439                 $valign_buffer_filling = $seqno_string;
5440
5441             }
5442         }
5443         return;
5444     }
5445 }
5446
5447 ###############################
5448 # CODE SECTION 9: Output Step D
5449 ###############################
5450
5451 sub valign_output_step_D {
5452
5453     ###############################################################
5454     # This is Step D in writing vertically aligned lines.
5455     # It is the end of the vertical alignment pipeline.
5456     # Write one vertically aligned line of code to the output object.
5457     ###############################################################
5458
5459     my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5460
5461     # The line is currently correct if there is no tabbing (recommended!)
5462     # We may have to lop off some leading spaces and replace with tabs.
5463     if ( $leading_space_count > 0 ) {
5464
5465         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5466         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5467         my $rOpts_entab_leading_whitespace =
5468           $self->[_rOpts_entab_leading_whitespace_];
5469
5470         # Nothing to do if no tabs
5471         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5472             || $rOpts_indent_columns <= 0 )
5473         {
5474
5475             # nothing to do
5476         }
5477
5478         # Handle entab option
5479         elsif ($rOpts_entab_leading_whitespace) {
5480
5481             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5482             # not correctly entabbed, nor were side comments: Increase leading
5483             # space count for a padded line to get correct tabbing
5484             if ( $line =~ /^(\s+)(.*)$/ ) {
5485                 my $spaces = length($1);
5486                 if ( $spaces > $leading_space_count ) {
5487                     $leading_space_count = $spaces;
5488                 }
5489             }
5490
5491             my $space_count =
5492               $leading_space_count % $rOpts_entab_leading_whitespace;
5493             my $tab_count =
5494               int( $leading_space_count / $rOpts_entab_leading_whitespace );
5495             my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5496             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5497                 substr( $line, 0, $leading_space_count ) = $leading_string;
5498             }
5499             else {
5500
5501                 # shouldn't happen - program error counting whitespace
5502                 # - skip entabbing
5503                 DEBUG_TABS
5504                   && warning(
5505 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5506                   );
5507             }
5508         }
5509
5510         # Handle option of one tab per level
5511         else {
5512             my $leading_string = ( "\t" x $level );
5513             my $space_count =
5514               $leading_space_count - $level * $rOpts_indent_columns;
5515
5516             # shouldn't happen:
5517             if ( $space_count < 0 ) {
5518
5519                 # But it could be an outdented comment
5520                 if ( $line !~ /^\s*#/ ) {
5521                     DEBUG_TABS
5522                       && warning(
5523 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5524                       );
5525                 }
5526                 $leading_string = ( SPACE x $leading_space_count );
5527             }
5528             else {
5529                 $leading_string .= ( SPACE x $space_count );
5530             }
5531             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5532                 substr( $line, 0, $leading_space_count ) = $leading_string;
5533             }
5534             else {
5535
5536                 # shouldn't happen - program error counting whitespace
5537                 # we'll skip entabbing
5538                 DEBUG_TABS
5539                   && warning(
5540 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5541                   );
5542             }
5543         }
5544     }
5545     my $file_writer_object = $self->[_file_writer_object_];
5546     $file_writer_object->write_code_line( $line . "\n", $Kend );
5547
5548     return;
5549 }
5550
5551 {    ## closure for sub get_leading_string
5552
5553     my @leading_string_cache;
5554
5555     sub initialize_leading_string_cache {
5556         @leading_string_cache = ();
5557         return;
5558     }
5559
5560     sub get_leading_string {
5561
5562         # define the leading whitespace string for this line..
5563         my ( $self, $leading_whitespace_count, $group_level ) = @_;
5564
5565         # Handle case of zero whitespace, which includes multi-line quotes
5566         # (which may have a finite level; this prevents tab problems)
5567         if ( $leading_whitespace_count <= 0 ) {
5568             return EMPTY_STRING;
5569         }
5570
5571         # look for previous result
5572         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5573             return $leading_string_cache[$leading_whitespace_count];
5574         }
5575
5576         # must compute a string for this number of spaces
5577         my $leading_string;
5578
5579         # Handle simple case of no tabs
5580         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5581         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5582         my $rOpts_entab_leading_whitespace =
5583           $self->[_rOpts_entab_leading_whitespace_];
5584
5585         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5586             || $rOpts_indent_columns <= 0 )
5587         {
5588             $leading_string = ( SPACE x $leading_whitespace_count );
5589         }
5590
5591         # Handle entab option
5592         elsif ($rOpts_entab_leading_whitespace) {
5593             my $space_count =
5594               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5595             my $tab_count = int(
5596                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5597             $leading_string = "\t" x $tab_count . SPACE x $space_count;
5598         }
5599
5600         # Handle option of one tab per level
5601         else {
5602             $leading_string = ( "\t" x $group_level );
5603             my $space_count =
5604               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5605
5606             # shouldn't happen:
5607             if ( $space_count < 0 ) {
5608                 DEBUG_TABS
5609                   && warning(
5610 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5611                   );
5612
5613                 # -- skip entabbing
5614                 $leading_string = ( SPACE x $leading_whitespace_count );
5615             }
5616             else {
5617                 $leading_string .= ( SPACE x $space_count );
5618             }
5619         }
5620         $leading_string_cache[$leading_whitespace_count] = $leading_string;
5621         return $leading_string;
5622     }
5623 } ## end get_leading_string
5624
5625 ##########################
5626 # CODE SECTION 10: Summary
5627 ##########################
5628
5629 sub report_anything_unusual {
5630     my $self = shift;
5631
5632     my $outdented_line_count = $self->[_outdented_line_count_];
5633     if ( $outdented_line_count > 0 ) {
5634         write_logfile_entry(
5635             "$outdented_line_count long lines were outdented:\n");
5636         my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5637         write_logfile_entry(
5638             "  First at output line $first_outdented_line_at\n");
5639
5640         if ( $outdented_line_count > 1 ) {
5641             my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5642             write_logfile_entry(
5643                 "   Last at output line $last_outdented_line_at\n");
5644         }
5645         write_logfile_entry(
5646             "  use -noll to prevent outdenting, -l=n to increase line length\n"
5647         );
5648         write_logfile_entry("\n");
5649     }
5650     return;
5651 }
5652 1;