lib/Perl/Tidy/VerticalAligner.pm

   1 package Perl::Tidy::VerticalAligner;
   2 use strict;
   3 use warnings;
   4 use Carp;
   5 use English qw( -no_match_vars );
   6 our $VERSION = '20221112';
   7 use Perl::Tidy::VerticalAligner::Alignment;
   8 use Perl::Tidy::VerticalAligner::Line;
   9
  10 use constant DEVEL_MODE   => 0;
  11 use constant EMPTY_STRING => q{};
  12 use constant SPACE        => q{ };
  13
  14 # The Perl::Tidy::VerticalAligner package collects output lines and
  15 # attempts to line up certain common tokens, such as => and #, which are
  16 # identified by the calling routine.
  17 #
  18 # Usage:
  19 #   - Initiate an object with a call to new().
  20 #   - Write lines one-by-one with calls to valign_input().
  21 #   - Make a final call to flush() to empty the pipeline.
  22 #
  23 # The sub valign_input collects lines into groups.  When a group reaches
  24 # the maximum possible size it is processed for alignment and output.
  25 # The maximum group size is reached whenever there is a change in indentation
  26 # level, a blank line, a block comment, or an external flush call.  The calling
  27 # routine may also force a break in alignment at any time.
  28 #
  29 # If the calling routine needs to interrupt the output and send other text to
  30 # the output, it must first call flush() to empty the output pipeline.  This
  31 # might occur for example if a block of pod text needs to be sent to the output
  32 # between blocks of code.
  33
  34 # It is essential that a final call to flush() be made. Otherwise some
  35 # final lines of text will be lost.
  36
  37 # Index...
  38 # CODE SECTION 1: Preliminary code, global definitions and sub new
  39 #                 sub new
  40 # CODE SECTION 2: Some Basic Utilities
  41 # CODE SECTION 3: Code to accept input and form groups
  42 #                 sub valign_input
  43 # CODE SECTION 4: Code to process comment lines
  44 #                 sub _flush_comment_lines
  45 # CODE SECTION 5: Code to process groups of code lines
  46 #                 sub _flush_group_lines
  47 # CODE SECTION 6: Output Step A
  48 #                 sub valign_output_step_A
  49 # CODE SECTION 7: Output Step B
  50 #                 sub valign_output_step_B
  51 # CODE SECTION 8: Output Step C
  52 #                 sub valign_output_step_C
  53 # CODE SECTION 9: Output Step D
  54 #                 sub valign_output_step_D
  55 # CODE SECTION 10: Summary
  56 #                 sub report_anything_unusual
  57
  58 ##################################################################
  59 # CODE SECTION 1: Preliminary code, global definitions and sub new
  60 ##################################################################
  61
  62 sub AUTOLOAD {
  63
  64     # Catch any undefined sub calls so that we are sure to get
  65     # some diagnostic information.  This sub should never be called
  66     # except for a programming error.
  67     our $AUTOLOAD;
  68     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  69     my ( $pkg, $fname, $lno ) = caller();
  70     my $my_package = __PACKAGE__;
  71     print STDERR <<EOM;
  72 ======================================================================
  73 Error detected in package '$my_package', version $VERSION
  74 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  75 Called from package: '$pkg'
  76 Called from File '$fname'  at line '$lno'
  77 This error is probably due to a recent programming change
  78 ======================================================================
  79 EOM
  80     exit 1;
  81 }
  82
  83 sub DESTROY {
  84
  85     # required to avoid call to AUTOLOAD in some versions of perl
  86 }
  87
  88 sub Die {
  89     my ($msg) = @_;
  90     Perl::Tidy::Die($msg);
  91     croak "unexpected return from Perl::Tidy::Die";
  92 }
  93
  94 sub Fault {
  95     my ($msg) = @_;
  96
  97     # This routine is called for errors that really should not occur
  98     # except if there has been a bug introduced by a recent program change.
  99     # Please add comments at calls to Fault to explain why the call
 100     # should not occur, and where to look to fix it.
 101     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
 102     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 103     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 104     my $input_stream_name = get_input_stream_name();
 105
 106     Die(<<EOM);
 107 ==============================================================================
 108 While operating on input stream with name: '$input_stream_name'
 109 A fault was detected at line $line0 of sub '$subroutine1'
 110 in file '$filename1'
 111 which was called from line $line1 of sub '$subroutine2'
 112 Message: '$msg'
 113 This is probably an error introduced by a recent programming change.
 114 Perl::Tidy::VerticalAligner.pm reports VERSION='$VERSION'.
 115 ==============================================================================
 116 EOM
 117
 118     # We shouldn't get here, but this return is to keep Perl-Critic from
 119     # complaining.
 120     return;
 121 }
 122
 123 my %valid_LINE_keys;
 124
 125 BEGIN {
 126
 127     # define valid keys in a line object
 128     my @q = qw(
 129       jmax
 130       rtokens
 131       rfields
 132       rfield_lengths
 133       rpatterns
 134       indentation
 135       leading_space_count
 136       outdent_long_lines
 137       list_type
 138       list_seqno
 139       is_hanging_side_comment
 140       maximum_line_length
 141       rvertical_tightness_flags
 142       is_terminal_ternary
 143       j_terminal_match
 144       end_group
 145       Kend
 146       ci_level
 147       level
 148       level_end
 149       imax_pair
 150
 151       ralignments
 152     );
 153
 154     @valid_LINE_keys{@q} = (1) x scalar(@q);
 155 }
 156
 157 BEGIN {
 158
 159     # Define the fixed indexes for variables in $self, which is an array
 160     # reference.  Note the convention of leading and trailing underscores to
 161     # keep them unique.
 162     # Do not combine with other BEGIN blocks (c101).
 163     my $i = 0;
 164     use constant {
 165         _file_writer_object_ => $i++,
 166         _logger_object_      => $i++,
 167         _diagnostics_object_ => $i++,
 168         _length_function_    => $i++,
 169
 170         _rOpts_                             => $i++,
 171         _rOpts_indent_columns_              => $i++,
 172         _rOpts_tabs_                        => $i++,
 173         _rOpts_entab_leading_whitespace_    => $i++,
 174         _rOpts_fixed_position_side_comment_ => $i++,
 175         _rOpts_minimum_space_to_comment_    => $i++,
 176         _rOpts_valign_code_                 => $i++,
 177         _rOpts_valign_block_comments_       => $i++,
 178         _rOpts_valign_side_comments_        => $i++,
 179
 180         _last_level_written_            => $i++,
 181         _last_side_comment_column_      => $i++,
 182         _last_side_comment_line_number_ => $i++,
 183         _last_side_comment_length_      => $i++,
 184         _last_side_comment_level_       => $i++,
 185         _outdented_line_count_          => $i++,
 186         _first_outdented_line_at_       => $i++,
 187         _last_outdented_line_at_        => $i++,
 188         _consecutive_block_comments_    => $i++,
 189
 190         _rgroup_lines_                => $i++,
 191         _group_level_                 => $i++,
 192         _group_type_                  => $i++,
 193         _group_maximum_line_length_   => $i++,
 194         _zero_count_                  => $i++,
 195         _last_leading_space_count_    => $i++,
 196         _comment_leading_space_count_ => $i++,
 197     };
 198
 199     # Debug flag. This is a relic from the original program development
 200     # looking for problems with tab characters.  Caution: this debug flag can
 201     # produce a lot of output It should be 0 except when debugging small
 202     # scripts.
 203
 204     use constant DEBUG_TABS => 0;
 205
 206     my $debug_warning = sub {
 207         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
 208         return;
 209     };
 210
 211     DEBUG_TABS && $debug_warning->('TABS');
 212 }
 213
 214 # GLOBAL variables
 215 my (
 216
 217     %valign_control_hash,
 218     $valign_control_default,
 219
 220 );
 221
 222 sub check_options {
 223
 224     # This routine is called to check the user-supplied run parameters
 225     # and to configure the control hashes to them.
 226     my ($rOpts) = @_;
 227
 228     # All alignments are done by default
 229     %valign_control_hash    = ();
 230     $valign_control_default = 1;
 231
 232     # If -vil=s is entered without -vxl, assume -vxl='*'
 233     if (  !$rOpts->{'valign-exclusion-list'}
 234         && $rOpts->{'valign-inclusion-list'} )
 235     {
 236         $rOpts->{'valign-exclusion-list'} = '*';
 237     }
 238
 239     # See if the user wants to exclude any alignment types ...
 240     if ( $rOpts->{'valign-exclusion-list'} ) {
 241
 242         # The inclusion list is only relevant if there is an exclusion list
 243         if ( $rOpts->{'valign-inclusion-list'} ) {
 244             my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
 245             @valign_control_hash{@vil} = (1) x scalar(@vil);
 246         }
 247
 248         # Note that the -vxl list is done after -vil, so -vxl has priority
 249         # in the event of duplicate entries.
 250         my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
 251         @valign_control_hash{@vxl} = (0) x scalar(@vxl);
 252
 253         # Optimization: revert to defaults if no exclusions.
 254         # This could happen with -vxl='  ' and any -vil list
 255         if ( !@vxl ) {
 256             %valign_control_hash = ();
 257         }
 258
 259         # '$valign_control_default' applies to types not in the hash:
 260         # - If a '*' was entered then set it to be that default type
 261         # - Otherwise, leave it set it to 1
 262         if ( defined( $valign_control_hash{'*'} ) ) {
 263             $valign_control_default = $valign_control_hash{'*'};
 264         }
 265
 266         # Side comments are controlled separately and must be removed
 267         # if given in a list.
 268         if (%valign_control_hash) {
 269             $valign_control_hash{'#'} = 1;
 270         }
 271     }
 272
 273     return;
 274 }
 275
 276 sub check_keys {
 277     my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
 278
 279     # Check the keys of a hash:
 280     # $rtest   = ref to hash to test
 281     # $rvalid  = ref to hash with valid keys
 282
 283     # $msg = a message to write in case of error
 284     # $exact_match defines the type of check:
 285     #     = false: test hash must not have unknown key
 286     #     = true:  test hash must have exactly same keys as known hash
 287     my @unknown_keys =
 288       grep { !exists $rvalid->{$_} } keys %{$rtest};
 289     my @missing_keys =
 290       grep { !exists $rtest->{$_} } keys %{$rvalid};
 291     my $error = @unknown_keys;
 292     if ($exact_match) { $error ||= @missing_keys }
 293     if ($error) {
 294         local $LIST_SEPARATOR = ')(';
 295         my @expected_keys = sort keys %{$rvalid};
 296         @unknown_keys = sort @unknown_keys;
 297         Fault(<<EOM);
 298 ------------------------------------------------------------------------
 299 Program error detected checking hash keys
 300 Message is: '$msg'
 301 Expected keys: (@expected_keys)
 302 Unknown key(s): (@unknown_keys)
 303 Missing key(s): (@missing_keys)
 304 ------------------------------------------------------------------------
 305 EOM
 306     }
 307     return;
 308 } ## end sub check_keys
 309
 310 sub new {
 311
 312     my ( $class, @args ) = @_;
 313
 314     my %defaults = (
 315         rOpts              => undef,
 316         file_writer_object => undef,
 317         logger_object      => undef,
 318         diagnostics_object => undef,
 319         length_function    => sub { return length( $_[0] ) },
 320     );
 321     my %args = ( %defaults, @args );
 322
 323     # Initialize other caches and buffers
 324     initialize_step_B_cache();
 325     initialize_valign_buffer();
 326     initialize_leading_string_cache();
 327     initialize_decode();
 328     set_logger_object( $args{logger_object} );
 329
 330     # Initialize all variables in $self.
 331     # To add an item to $self, first define a new constant index in the BEGIN
 332     # section.
 333     my $self = [];
 334
 335     # objects
 336     $self->[_file_writer_object_] = $args{file_writer_object};
 337     $self->[_logger_object_]      = $args{logger_object};
 338     $self->[_diagnostics_object_] = $args{diagnostics_object};
 339     $self->[_length_function_]    = $args{length_function};
 340
 341     # shortcuts to user options
 342     my $rOpts = $args{rOpts};
 343
 344     $self->[_rOpts_]                = $rOpts;
 345     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
 346     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
 347     $self->[_rOpts_entab_leading_whitespace_] =
 348       $rOpts->{'entab-leading-whitespace'};
 349     $self->[_rOpts_fixed_position_side_comment_] =
 350       $rOpts->{'fixed-position-side-comment'};
 351     $self->[_rOpts_minimum_space_to_comment_] =
 352       $rOpts->{'minimum-space-to-comment'};
 353     $self->[_rOpts_valign_code_]           = $rOpts->{'valign-code'};
 354     $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
 355     $self->[_rOpts_valign_side_comments_]  = $rOpts->{'valign-side-comments'};
 356
 357     # Batch of lines being collected
 358     $self->[_rgroup_lines_]                = [];
 359     $self->[_group_level_]                 = 0;
 360     $self->[_group_type_]                  = EMPTY_STRING;
 361     $self->[_group_maximum_line_length_]   = undef;
 362     $self->[_zero_count_]                  = 0;
 363     $self->[_comment_leading_space_count_] = 0;
 364     $self->[_last_leading_space_count_]    = 0;
 365
 366     # Memory of what has been processed
 367     $self->[_last_level_written_]            = -1;
 368     $self->[_last_side_comment_column_]      = 0;
 369     $self->[_last_side_comment_line_number_] = 0;
 370     $self->[_last_side_comment_length_]      = 0;
 371     $self->[_last_side_comment_level_]       = -1;
 372     $self->[_outdented_line_count_]          = 0;
 373     $self->[_first_outdented_line_at_]       = 0;
 374     $self->[_last_outdented_line_at_]        = 0;
 375     $self->[_consecutive_block_comments_]    = 0;
 376
 377     bless $self, $class;
 378     return $self;
 379 }
 380
 381 #################################
 382 # CODE SECTION 2: Basic Utilities
 383 #################################
 384
 385 sub flush {
 386
 387     # flush() is the external call to completely empty the pipeline.
 388     my ($self) = @_;
 389
 390     # push things out the pipeline...
 391
 392     # push out any current group lines
 393     $self->_flush_group_lines();
 394
 395     # then anything left in the cache of step_B
 396     $self->_flush_cache();
 397
 398     # then anything left in the buffer of step_C
 399     $self->dump_valign_buffer();
 400
 401     return;
 402 }
 403
 404 sub initialize_for_new_group {
 405     my ($self) = @_;
 406
 407     $self->[_rgroup_lines_]                = [];
 408     $self->[_group_type_]                  = EMPTY_STRING;
 409     $self->[_zero_count_]                  = 0;
 410     $self->[_comment_leading_space_count_] = 0;
 411     $self->[_last_leading_space_count_]    = 0;
 412     $self->[_group_maximum_line_length_]   = undef;
 413
 414     # Note that the value for _group_level_ is
 415     # handled separately in sub valign_input
 416     return;
 417 }
 418
 419 sub group_line_count {
 420     return +@{ $_[0]->[_rgroup_lines_] };
 421 }
 422
 423 # interface to Perl::Tidy::Diagnostics routines
 424 # For debugging; not currently used
 425 sub write_diagnostics {
 426     my ( $self, $msg ) = @_;
 427     my $diagnostics_object = $self->[_diagnostics_object_];
 428     if ($diagnostics_object) {
 429         $diagnostics_object->write_diagnostics($msg);
 430     }
 431     return;
 432 }
 433
 434 {    ## begin closure for logger routines
 435     my $logger_object;
 436
 437     # Called once per file to initialize the logger object
 438     sub set_logger_object {
 439         $logger_object = shift;
 440         return;
 441     }
 442
 443     sub get_logger_object {
 444         return $logger_object;
 445     }
 446
 447     sub get_input_stream_name {
 448         my $input_stream_name = EMPTY_STRING;
 449         if ($logger_object) {
 450             $input_stream_name = $logger_object->get_input_stream_name();
 451         }
 452         return $input_stream_name;
 453     }
 454
 455     sub warning {
 456         my ($msg) = @_;
 457         if ($logger_object) {
 458             $logger_object->warning($msg);
 459         }
 460         return;
 461     }
 462
 463     sub write_logfile_entry {
 464         my ($msg) = @_;
 465         if ($logger_object) {
 466             $logger_object->write_logfile_entry($msg);
 467         }
 468         return;
 469     }
 470 }
 471
 472 sub get_cached_line_count {
 473     my $self = shift;
 474     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
 475 }
 476
 477 sub get_recoverable_spaces {
 478
 479     # return the number of spaces (+ means shift right, - means shift left)
 480     # that we would like to shift a group of lines with the same indentation
 481     # to get them to line up with their opening parens
 482     my $indentation = shift;
 483     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
 484 }
 485
 486 ######################################################
 487 # CODE SECTION 3: Code to accept input and form groups
 488 ######################################################
 489
 490 sub push_group_line {
 491
 492     my ( $self, $new_line ) = @_;
 493     my $rgroup_lines = $self->[_rgroup_lines_];
 494     push @{$rgroup_lines}, $new_line;
 495     return;
 496 }
 497
 498 use constant DEBUG_VALIGN      => 0;
 499 use constant SC_LONG_LINE_DIFF => 12;
 500
 501 my %is_closing_token;
 502
 503 BEGIN {
 504     my @q = qw< } ) ] >;
 505     @is_closing_token{@q} = (1) x scalar(@q);
 506 }
 507
 508 #--------------------------------------------
 509 # VTFLAGS: Vertical tightness types and flags
 510 #--------------------------------------------
 511 # Vertical tightness is controlled by a 'type' and associated 'flags' for each
 512 # line.  These values are set by sub Formatter::set_vertical_tightness_flags.
 513 # These are defined as follows:
 514
 515 # Vertical Tightness Line Type Codes:
 516 # Type 0, no vertical tightness condition
 517 # Type 1, last token of this line is a non-block opening token
 518 # Type 2, first token of next line is a non-block closing
 519 # Type 3, isolated opening block brace
 520 # type 4, isolated closing block brace
 521
 522 # Opening token flag values are the vertical tightness flags
 523 # 0 do not join with next line
 524 # 1 just one join per line
 525 # 2 any number of joins
 526
 527 # Closing token flag values indicate spacing:
 528 # 0 = no space added before closing token
 529 # 1 = single space added before closing token
 530
 531 sub valign_input {
 532
 533     #---------------------------------------------------------------------
 534     # This is the front door of the vertical aligner.  On each call
 535     # we receive one line of specially marked text for vertical alignment.
 536     # We compare the line with the current group, and either:
 537     # - the line joins the current group if alignments match, or
 538     # - the current group is flushed and a new group is started otherwise
 539     #---------------------------------------------------------------------
 540     #
 541     # The key input parameters describing each line are:
 542     #     $level          = indentation level of this line
 543     #     $rfields        = ref to array of fields
 544     #     $rpatterns      = ref to array of patterns, one per field
 545     #     $rtokens        = ref to array of tokens starting fields 1,2,..
 546     #     $rfield_lengths = ref to array of field display widths
 547     #
 548     # Here is an example of what this package does.  In this example,
 549     # we are trying to line up both the '=>' and the '#'.
 550     #
 551     #         '18' => 'grave',    #   \`
 552     #         '19' => 'acute',    #   `'
 553     #         '20' => 'caron',    #   \v
 554     # <-tabs-><f1-><--field 2 ---><-f3->
 555     # |            |              |    |
 556     # |            |              |    |
 557     # col1        col2         col3 col4
 558     #
 559     # The calling routine has already broken the entire line into 3 fields as
 560     # indicated.  (So the work of identifying promising common tokens has
 561     # already been done).
 562     #
 563     # In this example, there will be 2 tokens being matched: '=>' and '#'.
 564     # They are the leading parts of fields 2 and 3, but we do need to know
 565     # what they are so that we can dump a group of lines when these tokens
 566     # change.
 567     #
 568     # The fields contain the actual characters of each field.  The patterns
 569     # are like the fields, but they contain mainly token types instead
 570     # of tokens, so they have fewer characters.  They are used to be
 571     # sure we are matching fields of similar type.
 572     #
 573     # In this example, there will be 4 column indexes being adjusted.  The
 574     # first one is always at zero.  The interior columns are at the start of
 575     # the matching tokens, and the last one tracks the maximum line length.
 576     #
 577     # Each time a new line comes in, it joins the current vertical
 578     # group if possible.  Otherwise it causes the current group to be flushed
 579     # and a new group is started.
 580     #
 581     # For each new group member, the column locations are increased, as
 582     # necessary, to make room for the new fields.  When the group is finally
 583     # output, these column numbers are used to compute the amount of spaces of
 584     # padding needed for each field.
 585     #
 586     # Programming note: the fields are assumed not to have any tab characters.
 587     # Tabs have been previously removed except for tabs in quoted strings and
 588     # side comments.  Tabs in these fields can mess up the column counting.
 589     # The log file warns the user if there are any such tabs.
 590
 591     my ( $self, $rcall_hash ) = @_;
 592
 593     # Unpack the call args. This form is significantly faster than getting them
 594     # one-by-one.
 595     my (
 596
 597         $Kend,
 598         $break_alignment_after,
 599         $break_alignment_before,
 600         $ci_level,
 601         $forget_side_comment,
 602         $indentation,
 603         $is_terminal_ternary,
 604         $level,
 605         $level_end,
 606         $list_seqno,
 607         $maximum_line_length,
 608         $outdent_long_lines,
 609         $rline_alignment,
 610         $rvertical_tightness_flags,
 611
 612       ) =
 613
 614       @{$rcall_hash}{
 615         qw(
 616           Kend
 617           break_alignment_after
 618           break_alignment_before
 619           ci_level
 620           forget_side_comment
 621           indentation
 622           is_terminal_ternary
 623           level
 624           level_end
 625           list_seqno
 626           maximum_line_length
 627           outdent_long_lines
 628           rline_alignment
 629           rvertical_tightness_flags
 630         )
 631       };
 632
 633     my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
 634       @{$rline_alignment};
 635
 636     # The index '$Kend' is a value which passed along with the line text to sub
 637     # 'write_code_line' for a convergence check.
 638
 639     # number of fields is $jmax
 640     # number of tokens between fields is $jmax-1
 641     my $jmax = @{$rfields} - 1;
 642
 643     my $leading_space_count =
 644       ref($indentation) ? $indentation->get_spaces() : $indentation;
 645
 646     # set outdented flag to be sure we either align within statements or
 647     # across statement boundaries, but not both.
 648     my $is_outdented =
 649       $self->[_last_leading_space_count_] > $leading_space_count;
 650     $self->[_last_leading_space_count_] = $leading_space_count;
 651
 652     # Identify a hanging side comment.  Hanging side comments have an empty
 653     # initial field.
 654     my $is_hanging_side_comment =
 655       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
 656
 657     # Undo outdented flag for a hanging side comment
 658     $is_outdented = 0 if $is_hanging_side_comment;
 659
 660     # Identify a block comment.
 661     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
 662
 663     # Block comment .. update count
 664     if ($is_block_comment) {
 665         $self->[_consecutive_block_comments_]++;
 666     }
 667
 668     # Not a block comment ..
 669     # Forget side comment column if we saw 2 or more block comments,
 670     # and reset the count
 671     else {
 672
 673         if ( $self->[_consecutive_block_comments_] > 1 ) {
 674             $self->forget_side_comment();
 675         }
 676         $self->[_consecutive_block_comments_] = 0;
 677     }
 678
 679     # Reset side comment location if we are entering a new block from level 0.
 680     # This is intended to keep them from drifting too far to the right.
 681     if ($forget_side_comment) {
 682         $self->forget_side_comment();
 683     }
 684
 685     my $is_balanced_line = $level_end == $level;
 686
 687     my $group_level               = $self->[_group_level_];
 688     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
 689
 690     DEBUG_VALIGN && do {
 691         my $nlines = $self->group_line_count();
 692         print STDOUT
 693 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
 694     };
 695
 696     # Validate cached line if necessary: If we can produce a container
 697     # with just 2 lines total by combining an existing cached opening
 698     # token with the closing token to follow, then we will mark both
 699     # cached flags as valid.
 700     my $cached_line_type = get_cached_line_type();
 701     if ($cached_line_type) {
 702         my $cached_line_opening_flag = get_cached_line_opening_flag();
 703         if ($rvertical_tightness_flags) {
 704             my $cached_seqno = get_cached_seqno();
 705             if (   $cached_seqno
 706                 && $rvertical_tightness_flags->{_vt_seqno}
 707                 && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
 708             {
 709
 710                 # Fix for b1187 and b1188: Normally this step is only done
 711                 # if the number of existing lines is 0 or 1.  But to prevent
 712                 # blinking, this range can be controlled by the caller.
 713                 # If zero values are given we fall back on the range 0 to 1.
 714                 my $line_count = $self->group_line_count();
 715                 my $min_lines  = $rvertical_tightness_flags->{_vt_min_lines};
 716                 my $max_lines  = $rvertical_tightness_flags->{_vt_max_lines};
 717                 $min_lines = 0 unless ($min_lines);
 718                 $max_lines = 1 unless ($max_lines);
 719                 if (   ( $line_count >= $min_lines )
 720                     && ( $line_count <= $max_lines ) )
 721                 {
 722                     $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
 723                     set_cached_line_valid(1);
 724                 }
 725             }
 726         }
 727
 728         # do not join an opening block brace (type 3, see VTFLAGS)
 729         # with an unbalanced line unless requested with a flag value of 2
 730         if (   $cached_line_type == 3
 731             && !$self->group_line_count()
 732             && $cached_line_opening_flag < 2
 733             && !$is_balanced_line )
 734         {
 735             set_cached_line_valid(0);
 736         }
 737     }
 738
 739     # shouldn't happen:
 740     if ( $level < 0 ) { $level = 0 }
 741
 742     # do not align code across indentation level changes
 743     # or changes in the maximum line length
 744     # or if vertical alignment is turned off
 745     if (
 746         $level != $group_level
 747         || (   $group_maximum_line_length
 748             && $maximum_line_length != $group_maximum_line_length )
 749         || $is_outdented
 750         || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
 751         || (   !$is_block_comment
 752             && !$self->[_rOpts_valign_side_comments_]
 753             && !$self->[_rOpts_valign_code_] )
 754       )
 755     {
 756
 757         $self->_flush_group_lines( $level - $group_level );
 758
 759         $group_level                         = $level;
 760         $self->[_group_level_]               = $group_level;
 761         $self->[_group_maximum_line_length_] = $maximum_line_length;
 762
 763         # Update leading spaces after the above flush because the leading space
 764         # count may have been changed if the -icp flag is in effect
 765         $leading_space_count =
 766           ref($indentation) ? $indentation->get_spaces() : $indentation;
 767     }
 768
 769     # --------------------------------------------------------------------
 770     # Collect outdentable block COMMENTS
 771     # --------------------------------------------------------------------
 772     if ( $self->[_group_type_] eq 'COMMENT' ) {
 773         if (   $is_block_comment
 774             && $outdent_long_lines
 775             && $leading_space_count == $self->[_comment_leading_space_count_] )
 776         {
 777
 778             # Note that for a comment group we are not storing a line
 779             # but rather just the text and its length.
 780             $self->push_group_line(
 781                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 782             return;
 783         }
 784         else {
 785             $self->_flush_group_lines();
 786         }
 787     }
 788
 789     my $rgroup_lines = $self->[_rgroup_lines_];
 790     if ( $break_alignment_before && @{$rgroup_lines} ) {
 791         $rgroup_lines->[-1]->{'end_group'} = 1;
 792     }
 793
 794     # --------------------------------------------------------------------
 795     # add dummy fields for terminal ternary
 796     # --------------------------------------------------------------------
 797     my $j_terminal_match;
 798
 799     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
 800         $j_terminal_match =
 801           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
 802             $rpatterns, $rfield_lengths, $group_level, );
 803         $jmax = @{$rfields} - 1;
 804     }
 805
 806     # --------------------------------------------------------------------
 807     # add dummy fields for else statement
 808     # --------------------------------------------------------------------
 809
 810     # Note the trailing space after 'else' here. If there were no space between
 811     # the else and the next '{' then we would not be able to do vertical
 812     # alignment of the '{'.
 813     if (   $rfields->[0] eq 'else '
 814         && @{$rgroup_lines}
 815         && $is_balanced_line )
 816     {
 817
 818         $j_terminal_match =
 819           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
 820             $rpatterns, $rfield_lengths );
 821         $jmax = @{$rfields} - 1;
 822     }
 823
 824     # --------------------------------------------------------------------
 825     # Handle simple line of code with no fields to match.
 826     # --------------------------------------------------------------------
 827     if ( $jmax <= 0 ) {
 828         $self->[_zero_count_]++;
 829
 830         if ( @{$rgroup_lines}
 831             && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
 832         {
 833
 834             # flush the current group if it has some aligned columns..
 835             # or we haven't seen a comment lately
 836             if (   $rgroup_lines->[0]->{'jmax'} > 1
 837                 || $self->[_zero_count_] > 3 )
 838             {
 839                 $self->_flush_group_lines();
 840
 841                 # Update '$rgroup_lines' - it will become a ref to empty array.
 842                 # This allows avoiding a call to get_group_line_count below.
 843                 $rgroup_lines = $self->[_rgroup_lines_];
 844             }
 845         }
 846
 847         # start new COMMENT group if this comment may be outdented
 848         if (   $is_block_comment
 849             && $outdent_long_lines
 850             && !@{$rgroup_lines} )
 851         {
 852             $self->[_group_type_]                  = 'COMMENT';
 853             $self->[_comment_leading_space_count_] = $leading_space_count;
 854             $self->[_group_maximum_line_length_]   = $maximum_line_length;
 855             $self->push_group_line(
 856                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 857             return;
 858         }
 859
 860         # just write this line directly if no current group, no side comment,
 861         # and no space recovery is needed.
 862         if (   !@{$rgroup_lines}
 863             && !get_recoverable_spaces($indentation) )
 864         {
 865
 866             $self->valign_output_step_B(
 867                 {
 868                     leading_space_count       => $leading_space_count,
 869                     line                      => $rfields->[0],
 870                     line_length               => $rfield_lengths->[0],
 871                     side_comment_length       => 0,
 872                     outdent_long_lines        => $outdent_long_lines,
 873                     rvertical_tightness_flags => $rvertical_tightness_flags,
 874                     level                     => $level,
 875                     level_end                 => $level_end,
 876                     Kend                      => $Kend,
 877                     maximum_line_length       => $maximum_line_length,
 878                 }
 879             );
 880             return;
 881         }
 882     }
 883     else {
 884         $self->[_zero_count_] = 0;
 885     }
 886
 887     # --------------------------------------------------------------------
 888     # It simplifies things to create a zero length side comment
 889     # if none exists.
 890     # --------------------------------------------------------------------
 891     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
 892         $jmax += 1;
 893         $rtokens->[ $jmax - 1 ]  = '#';
 894         $rfields->[$jmax]        = EMPTY_STRING;
 895         $rfield_lengths->[$jmax] = 0;
 896         $rpatterns->[$jmax]      = '#';
 897     }
 898
 899     # --------------------------------------------------------------------
 900     # create an object to hold this line
 901     # --------------------------------------------------------------------
 902
 903     # The hash keys below must match the list of keys in %valid_LINE_keys.
 904     # Values in this hash are accessed directly, except for 'ralignments',
 905     # rather than with get/set calls for efficiency.
 906     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
 907         {
 908             jmax                      => $jmax,
 909             rtokens                   => $rtokens,
 910             rfields                   => $rfields,
 911             rpatterns                 => $rpatterns,
 912             rfield_lengths            => $rfield_lengths,
 913             indentation               => $indentation,
 914             leading_space_count       => $leading_space_count,
 915             outdent_long_lines        => $outdent_long_lines,
 916             list_seqno                => $list_seqno,
 917             list_type                 => EMPTY_STRING,
 918             is_hanging_side_comment   => $is_hanging_side_comment,
 919             rvertical_tightness_flags => $rvertical_tightness_flags,
 920             is_terminal_ternary       => $is_terminal_ternary,
 921             j_terminal_match          => $j_terminal_match,
 922             end_group                 => $break_alignment_after,
 923             Kend                      => $Kend,
 924             ci_level                  => $ci_level,
 925             level                     => $level,
 926             level_end                 => $level_end,
 927             imax_pair                 => -1,
 928             maximum_line_length       => $maximum_line_length,
 929
 930             ralignments => [],
 931         }
 932     );
 933
 934     DEVEL_MODE
 935       && check_keys( $new_line, \%valid_LINE_keys,
 936         "Checking line keys at line definition", 1 );
 937
 938     # --------------------------------------------------------------------
 939     # Decide if this is a simple list of items.
 940     # We use this to be less restrictive in deciding what to align.
 941     # --------------------------------------------------------------------
 942     decide_if_list($new_line) if ($list_seqno);
 943
 944     # --------------------------------------------------------------------
 945     # Append this line to the current group (or start new group)
 946     # --------------------------------------------------------------------
 947
 948     $self->push_group_line($new_line);
 949     $self->[_group_maximum_line_length_] = $maximum_line_length;
 950
 951     # output this group if it ends in a terminal else or ternary line
 952     if ( defined($j_terminal_match) ) {
 953         $self->_flush_group_lines();
 954     }
 955
 956     # Force break after jump to lower level
 957     elsif ($level_end < $level
 958         || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
 959     {
 960         $self->_flush_group_lines(-1);
 961     }
 962
 963     # --------------------------------------------------------------------
 964     # Some old debugging stuff
 965     # --------------------------------------------------------------------
 966     DEBUG_VALIGN && do {
 967         print STDOUT "exiting valign_input fields:";
 968         dump_array( @{$rfields} );
 969         print STDOUT "exiting valign_input tokens:";
 970         dump_array( @{$rtokens} );
 971         print STDOUT "exiting valign_input patterns:";
 972         dump_array( @{$rpatterns} );
 973     };
 974
 975     return;
 976 }
 977
 978 sub join_hanging_comment {
 979
 980     # Add dummy fields to a hanging side comment to make it look
 981     # like the first line in its potential group.  This simplifies
 982     # the coding.
 983     my ( $new_line, $old_line ) = @_;
 984
 985     my $jmax = $new_line->{'jmax'};
 986
 987     # must be 2 fields
 988     return 0 unless $jmax == 1;
 989     my $rtokens = $new_line->{'rtokens'};
 990
 991     # the second field must be a comment
 992     return 0 unless $rtokens->[0] eq '#';
 993     my $rfields = $new_line->{'rfields'};
 994
 995     # the first field must be empty
 996     return 0 unless $rfields->[0] =~ /^\s*$/;
 997
 998     # the current line must have fewer fields
 999     my $maximum_field_index = $old_line->{'jmax'};
1000     return 0
1001       unless $maximum_field_index > $jmax;
1002
1003     # looks ok..
1004     my $rpatterns      = $new_line->{'rpatterns'};
1005     my $rfield_lengths = $new_line->{'rfield_lengths'};
1006
1007     $new_line->{'is_hanging_side_comment'} = 1;
1008
1009     $jmax                     = $maximum_field_index;
1010     $new_line->{'jmax'}       = $jmax;
1011     $rfields->[$jmax]         = $rfields->[1];
1012     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
1013     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
1014     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1015
1016     foreach my $j ( 1 .. $jmax - 1 ) {
1017         $rfields->[$j]         = EMPTY_STRING;
1018         $rfield_lengths->[$j]  = 0;
1019         $rtokens->[ $j - 1 ]   = EMPTY_STRING;
1020         $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1021     }
1022     return 1;
1023 }
1024
1025 {    ## closure for sub decide_if_list
1026
1027     my %is_comma_token;
1028
1029     BEGIN {
1030
1031         my @q = qw( => );
1032         push @q, ',';
1033         @is_comma_token{@q} = (1) x scalar(@q);
1034     }
1035
1036     sub decide_if_list {
1037
1038         my $line = shift;
1039
1040         # A list will be taken to be a line with a forced break in which all
1041         # of the field separators are commas or comma-arrows (except for the
1042         # trailing #)
1043
1044         my $rtokens    = $line->{'rtokens'};
1045         my $test_token = $rtokens->[0];
1046         my ( $raw_tok, $lev, $tag, $tok_count ) =
1047           decode_alignment_token($test_token);
1048         if ( $is_comma_token{$raw_tok} ) {
1049             my $list_type = $test_token;
1050             my $jmax      = $line->{'jmax'};
1051
1052             foreach ( 1 .. $jmax - 2 ) {
1053                 ( $raw_tok, $lev, $tag, $tok_count ) =
1054                   decode_alignment_token( $rtokens->[$_] );
1055                 if ( !$is_comma_token{$raw_tok} ) {
1056                     $list_type = EMPTY_STRING;
1057                     last;
1058                 }
1059             }
1060             $line->{'list_type'} = $list_type;
1061         }
1062         return;
1063     }
1064 }
1065
1066 sub fix_terminal_ternary {
1067
1068     # Add empty fields as necessary to align a ternary term
1069     # like this:
1070     #
1071     #  my $leapyear =
1072     #      $year % 4   ? 0
1073     #    : $year % 100 ? 1
1074     #    : $year % 400 ? 0
1075     #    :               1;
1076     #
1077     # returns the index of the terminal question token, if any
1078
1079     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
1080         $group_level )
1081       = @_;
1082
1083     return unless ($old_line);
1084     use constant EXPLAIN_TERNARY => 0;
1085
1086     if (%valign_control_hash) {
1087         my $align_ok = $valign_control_hash{'?'};
1088         $align_ok = $valign_control_default unless defined($align_ok);
1089         return unless ($align_ok);
1090     }
1091
1092     my $jmax        = @{$rfields} - 1;
1093     my $rfields_old = $old_line->{'rfields'};
1094
1095     my $rpatterns_old       = $old_line->{'rpatterns'};
1096     my $rtokens_old         = $old_line->{'rtokens'};
1097     my $maximum_field_index = $old_line->{'jmax'};
1098
1099     # look for the question mark after the :
1100     my ($jquestion);
1101     my $depth_question;
1102     my $pad        = EMPTY_STRING;
1103     my $pad_length = 0;
1104     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1105         my $tok = $rtokens_old->[$j];
1106         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1107         if ( $raw_tok eq '?' ) {
1108             $depth_question = $lev;
1109
1110             # depth must be correct
1111             next unless ( $depth_question eq $group_level );
1112
1113             $jquestion = $j;
1114             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1115                 $pad_length = length($1);
1116                 $pad        = SPACE x $pad_length;
1117             }
1118             else {
1119                 return;    # shouldn't happen
1120             }
1121             last;
1122         }
1123     }
1124     return unless ( defined($jquestion) );    # shouldn't happen
1125
1126     # Now splice the tokens and patterns of the previous line
1127     # into the else line to insure a match.  Add empty fields
1128     # as necessary.
1129     my $jadd = $jquestion;
1130
1131     # Work on copies of the actual arrays in case we have
1132     # to return due to an error
1133     my @fields        = @{$rfields};
1134     my @patterns      = @{$rpatterns};
1135     my @tokens        = @{$rtokens};
1136     my @field_lengths = @{$rfield_lengths};
1137
1138     EXPLAIN_TERNARY && do {
1139         local $LIST_SEPARATOR = '><';
1140         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
1141         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
1142         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1143         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
1144         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1145         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1146     };
1147
1148     # handle cases of leading colon on this line
1149     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1150
1151         my ( $colon, $therest ) = ( $1, $2 );
1152
1153         # Handle sub-case of first field with leading colon plus additional code
1154         # This is the usual situation as at the '1' below:
1155         #  ...
1156         #  : $year % 400 ? 0
1157         #  :               1;
1158         if ($therest) {
1159
1160             # Split the first field after the leading colon and insert padding.
1161             # Note that this padding will remain even if the terminal value goes
1162             # out on a separate line.  This does not seem to look to bad, so no
1163             # mechanism has been included to undo it.
1164             my $field1        = shift @fields;
1165             my $field_length1 = shift @field_lengths;
1166             my $len_colon     = length($colon);
1167             unshift @fields, ( $colon, $pad . $therest );
1168             unshift @field_lengths,
1169               ( $len_colon, $pad_length + $field_length1 - $len_colon );
1170
1171             # change the leading pattern from : to ?
1172             return unless ( $patterns[0] =~ s/^\:/?/ );
1173
1174             # install leading tokens and patterns of existing line
1175             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1176             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1177
1178             # insert appropriate number of empty fields
1179             splice( @fields,        1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1180             splice( @field_lengths, 1, 0, (0) x $jadd )            if $jadd;
1181         }
1182
1183         # handle sub-case of first field just equal to leading colon.
1184         # This can happen for example in the example below where
1185         # the leading '(' would create a new alignment token
1186         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1187         # :                        ( $mname = $name . '->' );
1188         else {
1189
1190             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
1191
1192             # prepend a leading ? onto the second pattern
1193             $patterns[1] = "?b" . $patterns[1];
1194
1195             # pad the second field
1196             $fields[1]        = $pad . $fields[1];
1197             $field_lengths[1] = $pad_length + $field_lengths[1];
1198
1199             # install leading tokens and patterns of existing line, replacing
1200             # leading token and inserting appropriate number of empty fields
1201             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
1202             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
1203             splice( @fields,        1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1204             splice( @field_lengths, 1, 0, (0) x $jadd )            if $jadd;
1205         }
1206     }
1207
1208     # Handle case of no leading colon on this line.  This will
1209     # be the case when -wba=':' is used.  For example,
1210     #  $year % 400 ? 0 :
1211     #                1;
1212     else {
1213
1214         # install leading tokens and patterns of existing line
1215         $patterns[0] = '?' . 'b' . $patterns[0];
1216         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1217         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1218
1219         # insert appropriate number of empty fields
1220         $jadd             = $jquestion + 1;
1221         $fields[0]        = $pad . $fields[0];
1222         $field_lengths[0] = $pad_length + $field_lengths[0];
1223         splice( @fields,        0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1224         splice( @field_lengths, 0, 0, (0) x $jadd )            if $jadd;
1225     }
1226
1227     EXPLAIN_TERNARY && do {
1228         local $LIST_SEPARATOR = '><';
1229         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
1230         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
1231         print STDOUT "MODIFIED FIELDS=<@fields>\n";
1232     };
1233
1234     # all ok .. update the arrays
1235     @{$rfields}        = @fields;
1236     @{$rtokens}        = @tokens;
1237     @{$rpatterns}      = @patterns;
1238     @{$rfield_lengths} = @field_lengths;
1239
1240     # force a flush after this line
1241     return $jquestion;
1242 }
1243
1244 sub fix_terminal_else {
1245
1246     # Add empty fields as necessary to align a balanced terminal
1247     # else block to a previous if/elsif/unless block,
1248     # like this:
1249     #
1250     #  if   ( 1 || $x ) { print "ok 13\n"; }
1251     #  else             { print "not ok 13\n"; }
1252     #
1253     # returns a positive value if the else block should be indented
1254     #
1255     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1256
1257     return unless ($old_line);
1258     my $jmax = @{$rfields} - 1;
1259     return unless ( $jmax > 0 );
1260
1261     if (%valign_control_hash) {
1262         my $align_ok = $valign_control_hash{'{'};
1263         $align_ok = $valign_control_default unless defined($align_ok);
1264         return unless ($align_ok);
1265     }
1266
1267     # check for balanced else block following if/elsif/unless
1268     my $rfields_old = $old_line->{'rfields'};
1269
1270     # TBD: add handling for 'case'
1271     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1272
1273     # look for the opening brace after the else, and extract the depth
1274     my $tok_brace = $rtokens->[0];
1275     my $depth_brace;
1276     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1277
1278     # probably:  "else # side_comment"
1279     else { return }
1280
1281     my $rpatterns_old       = $old_line->{'rpatterns'};
1282     my $rtokens_old         = $old_line->{'rtokens'};
1283     my $maximum_field_index = $old_line->{'jmax'};
1284
1285     # be sure the previous if/elsif is followed by an opening paren
1286     my $jparen    = 0;
1287     my $tok_paren = '(' . $depth_brace;
1288     my $tok_test  = $rtokens_old->[$jparen];
1289     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
1290
1291     # Now find the opening block brace
1292     my ($jbrace);
1293     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1294         my $tok = $rtokens_old->[$j];
1295         if ( $tok eq $tok_brace ) {
1296             $jbrace = $j;
1297             last;
1298         }
1299     }
1300     return unless ( defined($jbrace) );           # shouldn't happen
1301
1302     # Now splice the tokens and patterns of the previous line
1303     # into the else line to insure a match.  Add empty fields
1304     # as necessary.
1305     my $jadd = $jbrace - $jparen;
1306     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1307     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1308     splice( @{$rfields},        1, 0, (EMPTY_STRING) x $jadd );
1309     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1310
1311     # force a flush after this line if it does not follow a case
1312     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1313     else                                      { return $jbrace }
1314 }
1315
1316 my %is_closing_block_type;
1317
1318 BEGIN {
1319     my @q = qw< } ] >;
1320     @is_closing_block_type{@q} = (1) x scalar(@q);
1321 }
1322
1323 # This is a flag for testing alignment by sub sweep_left_to_right only.
1324 # This test can help find problems with the alignment logic.
1325 # This flag should normally be zero.
1326 use constant TEST_SWEEP_ONLY => 0;
1327
1328 use constant EXPLAIN_CHECK_MATCH => 0;
1329
1330 sub check_match {
1331
1332     # See if the current line matches the current vertical alignment group.
1333
1334     my ( $self, $new_line, $base_line, $prev_line ) = @_;
1335
1336     # Given:
1337     #  $new_line  = the line being considered for group inclusion
1338     #  $base_line = the first line of the current group
1339     #  $prev_line = the line just before $new_line
1340
1341     # returns a flag and a value as follows:
1342     #    return (0, $imax_align)   if the line does not match
1343     #    return (1, $imax_align)   if the line matches but does not fit
1344     #    return (2, $imax_align)   if the line matches and fits
1345
1346     use constant NO_MATCH      => 0;
1347     use constant MATCH_NO_FIT  => 1;
1348     use constant MATCH_AND_FIT => 2;
1349
1350     my $return_value;
1351
1352     # Returns '$imax_align' which is the index of the maximum matching token.
1353     # It will be used in the subsequent left-to-right sweep to align as many
1354     # tokens as possible for lines which partially match.
1355     my $imax_align = -1;
1356
1357     # variable $GoToMsg explains reason for no match, for debugging
1358     my $GoToMsg = EMPTY_STRING;
1359
1360     my $jmax                = $new_line->{'jmax'};
1361     my $maximum_field_index = $base_line->{'jmax'};
1362
1363     my $jlimit = $jmax - 2;
1364     if ( $jmax > $maximum_field_index ) {
1365         $jlimit = $maximum_field_index - 2;
1366     }
1367
1368     if ( $new_line->{'is_hanging_side_comment'} ) {
1369
1370         # HSC's can join the group if they fit
1371     }
1372
1373     # Everything else
1374     else {
1375
1376         # A group with hanging side comments ends with the first non hanging
1377         # side comment.
1378         if ( $base_line->{'is_hanging_side_comment'} ) {
1379             $GoToMsg      = "end of hanging side comments";
1380             $return_value = NO_MATCH;
1381         }
1382         else {
1383
1384             # The number of tokens that this line shares with the previous
1385             # line has been stored with the previous line.  This value was
1386             # calculated and stored by sub 'match_line_pair'.
1387             $imax_align = $prev_line->{'imax_pair'};
1388
1389             if ( $imax_align != $jlimit ) {
1390                 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1391                 $return_value = NO_MATCH;
1392             }
1393         }
1394     }
1395
1396     if ( !defined($return_value) ) {
1397
1398         # The tokens match, but the lines must have identical number of
1399         # tokens to join the group.
1400         if ( $maximum_field_index != $jmax ) {
1401             $GoToMsg      = "token count differs";
1402             $return_value = NO_MATCH;
1403         }
1404
1405         # The tokens match. Now See if there is space for this line in the
1406         # current group.
1407         elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1408         {
1409
1410             $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1411             $return_value = MATCH_AND_FIT;
1412             $imax_align   = $jlimit;
1413         }
1414         else {
1415             $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1416             $return_value = MATCH_NO_FIT;
1417             $imax_align   = $jlimit;
1418         }
1419     }
1420
1421     EXPLAIN_CHECK_MATCH
1422       && print
1423 "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1424
1425     return ( $return_value, $imax_align );
1426 }
1427
1428 sub check_fit {
1429
1430     my ( $self, $new_line, $old_line ) = @_;
1431
1432     # The new line has alignments identical to the current group. Now we have
1433     # to fit the new line into the group without causing a field to exceed the
1434     # line length limit.
1435     #   return true if successful
1436     #   return false if not successful
1437
1438     my $jmax                = $new_line->{'jmax'};
1439     my $leading_space_count = $new_line->{'leading_space_count'};
1440     my $rfield_lengths      = $new_line->{'rfield_lengths'};
1441     my $padding_available   = $old_line->get_available_space_on_right();
1442     my $jmax_old            = $old_line->{'jmax'};
1443     my $rtokens_old         = $old_line->{'rtokens'};
1444
1445     # Safety check ... only lines with equal array sizes should arrive here
1446     # from sub check_match.  So if this error occurs, look at recent changes in
1447     # sub check_match.  It is only supposed to check the fit of lines with
1448     # identical numbers of alignment tokens.
1449     if ( $jmax_old ne $jmax ) {
1450
1451         warning(<<EOM);
1452 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1453 unexpected difference in array lengths: $jmax != $jmax_old
1454 EOM
1455         return;
1456     }
1457
1458     # Save current columns in case this line does not fit.
1459     my @alignments = @{ $old_line->{'ralignments'} };
1460     foreach my $alignment (@alignments) {
1461         $alignment->save_column();
1462     }
1463
1464     my $is_hanging_side_comment = $new_line->{'is_hanging_side_comment'};
1465
1466     # Loop over all alignments ...
1467     my $maximum_field_index = $old_line->{'jmax'};
1468     for my $j ( 0 .. $jmax ) {
1469
1470         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1471
1472         if ( $j == 0 ) {
1473             $pad += $leading_space_count;
1474         }
1475
1476         # Keep going if this field does not need any space.
1477         next if ( $pad < 0 );
1478
1479         # Revert to the starting state if does not fit
1480         if ( $pad > $padding_available ) {
1481
1482             #----------------------------------------------
1483             # Line does not fit -- revert to starting state
1484             #----------------------------------------------
1485             foreach my $alignment (@alignments) {
1486                 $alignment->restore_column();
1487             }
1488             return;
1489         }
1490
1491         # make room for this field
1492         $old_line->increase_field_width( $j, $pad );
1493         $padding_available -= $pad;
1494     }
1495
1496     #-------------------------------------
1497     # The line fits, the match is accepted
1498     #-------------------------------------
1499     return 1;
1500
1501 }
1502
1503 sub install_new_alignments {
1504
1505     my ($new_line) = @_;
1506
1507     my $jmax           = $new_line->{'jmax'};
1508     my $rfield_lengths = $new_line->{'rfield_lengths'};
1509     my $col            = $new_line->{'leading_space_count'};
1510
1511     my @alignments;
1512     for my $j ( 0 .. $jmax ) {
1513         $col += $rfield_lengths->[$j];
1514
1515         # create initial alignments for the new group
1516         my $alignment =
1517           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1518         push @alignments, $alignment;
1519     }
1520     $new_line->{'ralignments'} = \@alignments;
1521     return;
1522 }
1523
1524 sub copy_old_alignments {
1525     my ( $new_line, $old_line ) = @_;
1526     my @new_alignments = @{ $old_line->{'ralignments'} };
1527     $new_line->{'ralignments'} = \@new_alignments;
1528     return;
1529 }
1530
1531 sub dump_array {
1532
1533     # debug routine to dump array contents
1534     local $LIST_SEPARATOR = ')(';
1535     print STDOUT "(@_)\n";
1536     return;
1537 }
1538
1539 sub level_change {
1540
1541     # compute decrease in level when we remove $diff spaces from the
1542     # leading spaces
1543     my ( $self, $leading_space_count, $diff, $level ) = @_;
1544
1545     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1546     if ($rOpts_indent_columns) {
1547         my $olev =
1548           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1549         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1550         $level -= ( $olev - $nlev );
1551         if ( $level < 0 ) { $level = 0 }
1552     }
1553     return $level;
1554 }
1555
1556 ###############################################
1557 # CODE SECTION 4: Code to process comment lines
1558 ###############################################
1559
1560 sub _flush_comment_lines {
1561
1562     # Output a group consisting of COMMENT lines
1563
1564     my ($self) = @_;
1565     my $rgroup_lines = $self->[_rgroup_lines_];
1566     return unless ( @{$rgroup_lines} );
1567     my $group_level               = $self->[_group_level_];
1568     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1569     my $leading_space_count       = $self->[_comment_leading_space_count_];
1570     my $leading_string =
1571       $self->get_leading_string( $leading_space_count, $group_level );
1572
1573     # look for excessively long lines
1574     my $max_excess = 0;
1575     foreach my $item ( @{$rgroup_lines} ) {
1576         my ( $str, $str_len ) = @{$item};
1577         my $excess =
1578           $str_len + $leading_space_count - $group_maximum_line_length;
1579         if ( $excess > $max_excess ) {
1580             $max_excess = $excess;
1581         }
1582     }
1583
1584     # zero leading space count if any lines are too long
1585     if ( $max_excess > 0 ) {
1586         $leading_space_count -= $max_excess;
1587         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1588         my $file_writer_object = $self->[_file_writer_object_];
1589         my $last_outdented_line_at =
1590           $file_writer_object->get_output_line_number();
1591         my $nlines = @{$rgroup_lines};
1592         $self->[_last_outdented_line_at_] =
1593           $last_outdented_line_at + $nlines - 1;
1594         my $outdented_line_count = $self->[_outdented_line_count_];
1595         unless ($outdented_line_count) {
1596             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1597         }
1598         $outdented_line_count += $nlines;
1599         $self->[_outdented_line_count_] = $outdented_line_count;
1600     }
1601
1602     # write the lines
1603     my $outdent_long_lines = 0;
1604
1605     foreach my $item ( @{$rgroup_lines} ) {
1606         my ( $str, $str_len, $Kend ) = @{$item};
1607         $self->valign_output_step_B(
1608             {
1609                 leading_space_count       => $leading_space_count,
1610                 line                      => $str,
1611                 line_length               => $str_len,
1612                 side_comment_length       => 0,
1613                 outdent_long_lines        => $outdent_long_lines,
1614                 rvertical_tightness_flags => undef,
1615                 level                     => $group_level,
1616                 level_end                 => $group_level,
1617                 Kend                      => $Kend,
1618                 maximum_line_length       => $group_maximum_line_length,
1619             }
1620         );
1621     }
1622
1623     $self->initialize_for_new_group();
1624     return;
1625 }
1626
1627 ######################################################
1628 # CODE SECTION 5: Code to process groups of code lines
1629 ######################################################
1630
1631 sub _flush_group_lines {
1632
1633     # This is the vertical aligner internal flush, which leaves the cache
1634     # intact
1635     my ( $self, $level_jump ) = @_;
1636
1637     # $level_jump = $next_level-$group_level, if known
1638     #             = undef if not known
1639     # Note: only the sign of the jump is needed
1640
1641     my $rgroup_lines = $self->[_rgroup_lines_];
1642     return unless ( @{$rgroup_lines} );
1643     my $group_type  = $self->[_group_type_];
1644     my $group_level = $self->[_group_level_];
1645
1646     # Debug
1647     0 && do {
1648         my ( $a, $b, $c ) = caller();
1649         my $nlines = @{$rgroup_lines};
1650         print STDOUT
1651 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1652     };
1653
1654     #-------------------------------------------
1655     # Section 1: Handle a group of COMMENT lines
1656     #-------------------------------------------
1657     if ( $group_type eq 'COMMENT' ) {
1658         $self->_flush_comment_lines();
1659         return;
1660     }
1661
1662     #------------------------------------------------------------------------
1663     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
1664     # aligning happens here in the following steps:
1665     #------------------------------------------------------------------------
1666
1667     # STEP 1: Remove most unmatched tokens. They block good alignments.
1668     my ( $max_lev_diff, $saw_side_comment ) =
1669       delete_unmatched_tokens( $rgroup_lines, $group_level );
1670
1671     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1672     # matching common alignments.  The indexes of these subgroups are in the
1673     # return variable.
1674     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1675
1676     # STEP 3: Sweep left to right through the lines, looking for leading
1677     # alignment tokens shared by groups.
1678     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1679       if ( @{$rgroups} > 1 );
1680
1681     # STEP 4: Move side comments to a common column if possible.
1682     if ($saw_side_comment) {
1683         $self->align_side_comments( $rgroup_lines, $rgroups );
1684     }
1685
1686     # STEP 5: For the -lp option, increase the indentation of lists
1687     # to the desired amount, but do not exceed the line length limit.
1688
1689     # We are allowed to shift a group of lines to the right if:
1690     #  (1) its level is greater than the level of the previous group, and
1691     #  (2) its level is greater than the level of the next line to be written.
1692
1693     my $extra_indent_ok;
1694     if ( $group_level > $self->[_last_level_written_] ) {
1695
1696         # Use the level jump to next line to come, if given
1697         if ( defined($level_jump) ) {
1698             $extra_indent_ok = $level_jump < 0;
1699         }
1700
1701         # Otherwise, assume the next line has the level of the end of last line.
1702         # This fixes case c008.
1703         else {
1704             my $level_end = $rgroup_lines->[-1]->{'level_end'};
1705             $extra_indent_ok = $group_level > $level_end;
1706         }
1707     }
1708
1709     my $extra_leading_spaces =
1710       $extra_indent_ok
1711       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1712       : 0;
1713
1714     # STEP 6: Output the lines.
1715     # All lines in this group have the same leading spacing and maximum line
1716     # length
1717     my $group_leader_length       = $rgroup_lines->[0]->{'leading_space_count'};
1718     my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1719
1720     foreach my $line ( @{$rgroup_lines} ) {
1721         $self->valign_output_step_A(
1722             {
1723                 line                 => $line,
1724                 min_ci_gap           => 0,
1725                 do_not_align         => 0,
1726                 group_leader_length  => $group_leader_length,
1727                 extra_leading_spaces => $extra_leading_spaces,
1728                 level                => $group_level,
1729                 maximum_line_length  => $group_maximum_line_length,
1730             }
1731         );
1732     }
1733
1734     # Let the formatter know that this object has been processed and any
1735     # recoverable spaces have been handled.  This is needed for setting the
1736     # closing paren location in -lp mode.
1737     my $object = $rgroup_lines->[0]->{'indentation'};
1738     if ( ref($object) ) { $object->set_recoverable_spaces(0) }
1739
1740     $self->initialize_for_new_group();
1741     return;
1742 }
1743
1744 {    ## closure for sub sweep_top_down
1745
1746     my $rall_lines;         # all of the lines
1747     my $grp_level;          # level of all lines
1748     my $rgroups;            # describes the partition of lines we will make here
1749     my $group_line_count;   # number of lines in current partition
1750
1751     BEGIN { $rgroups = [] }
1752
1753     sub initialize_for_new_rgroup {
1754         $group_line_count = 0;
1755         return;
1756     }
1757
1758     sub add_to_rgroup {
1759
1760         my ($jend) = @_;
1761         my $rline = $rall_lines->[$jend];
1762
1763         my $jbeg = $jend;
1764         if ( $group_line_count == 0 ) {
1765             install_new_alignments($rline);
1766         }
1767         else {
1768             my $rvals = pop @{$rgroups};
1769             $jbeg = $rvals->[0];
1770             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1771         }
1772         push @{$rgroups}, [ $jbeg, $jend, undef ];
1773         $group_line_count++;
1774         return;
1775     }
1776
1777     sub get_rgroup_jrange {
1778
1779         return unless @{$rgroups};
1780         return unless ( $group_line_count > 0 );
1781         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1782         return ( $jbeg, $jend );
1783     }
1784
1785     sub end_rgroup {
1786
1787         my ($imax_align) = @_;
1788         return unless @{$rgroups};
1789         return unless ( $group_line_count > 0 );
1790
1791         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1792         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1793
1794         # Undo some alignments of poor two-line combinations.
1795         # We had to wait until now to know the line count.
1796         if ( $jend - $jbeg == 1 ) {
1797             my $line_0 = $rall_lines->[$jbeg];
1798             my $line_1 = $rall_lines->[$jend];
1799
1800             my $imax_pair = $line_1->{'imax_pair'};
1801             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1802
1803             ## flag for possible future use:
1804             ## my $is_isolated_pair = $imax_pair < 0
1805             ##  && ( $jbeg == 0
1806             ##    || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1807
1808             my $imax_prev =
1809               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1810
1811             my ( $is_marginal, $imax_align_fix ) =
1812               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1813                 $imax_prev );
1814             if ($is_marginal) {
1815                 combine_fields( $line_0, $line_1, $imax_align_fix );
1816             }
1817         }
1818
1819         initialize_for_new_rgroup();
1820         return;
1821     }
1822
1823     sub block_penultimate_match {
1824
1825         # emergency reset to prevent sweep_left_to_right from trying to match a
1826         # failed terminal else match
1827         return unless @{$rgroups} > 1;
1828         $rgroups->[-2]->[2] = -1;
1829         return;
1830     }
1831
1832     sub sweep_top_down {
1833         my ( $self, $rlines, $group_level ) = @_;
1834
1835         # Partition the set of lines into final alignment subgroups
1836         # and store the alignments with the lines.
1837
1838         # The alignment subgroups we are making here are groups of consecutive
1839         # lines which have (1) identical alignment tokens and (2) do not
1840         # exceed the allowable maximum line length.  A later sweep from
1841         # left-to-right ('sweep_lr') will handle additional alignments.
1842
1843         # transfer args to closure variables
1844         $rall_lines = $rlines;
1845         $grp_level  = $group_level;
1846         $rgroups    = [];
1847         initialize_for_new_rgroup();
1848         return unless @{$rlines};    # shouldn't happen
1849
1850         # Unset the _end_group flag for the last line if it it set because it
1851         # is not needed and can causes problems for -lp formatting
1852         $rall_lines->[-1]->{'end_group'} = 0;
1853
1854         # Loop over all lines ...
1855         my $jline = -1;
1856         foreach my $new_line ( @{$rall_lines} ) {
1857             $jline++;
1858
1859             # Start a new subgroup if necessary
1860             if ( !$group_line_count ) {
1861                 add_to_rgroup($jline);
1862                 if ( $new_line->{'end_group'} ) {
1863                     end_rgroup(-1);
1864                 }
1865                 next;
1866             }
1867
1868             my $j_terminal_match = $new_line->{'j_terminal_match'};
1869             my ( $jbeg, $jend ) = get_rgroup_jrange();
1870             if ( !defined($jbeg) ) {
1871
1872                 # safety check, shouldn't happen
1873                 warning(<<EOM);
1874 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1875 undefined index for group line count $group_line_count
1876 EOM
1877                 $jbeg = $jline;
1878             }
1879             my $base_line = $rall_lines->[$jbeg];
1880
1881             # Initialize a global flag saying if the last line of the group
1882             # should match end of group and also terminate the group.  There
1883             # should be no returns between here and where the flag is handled
1884             # at the bottom.
1885             my $col_matching_terminal = 0;
1886             if ( defined($j_terminal_match) ) {
1887
1888                 # remember the column of the terminal ? or { to match with
1889                 $col_matching_terminal =
1890                   $base_line->get_column($j_terminal_match);
1891
1892                 # Ignore an undefined value as a defensive step; shouldn't
1893                 # normally happen.
1894                 $col_matching_terminal = 0
1895                   unless defined($col_matching_terminal);
1896             }
1897
1898             # -------------------------------------------------------------
1899             # Allow hanging side comment to join current group, if any.  The
1900             # only advantage is to keep the other tokens in the same group. For
1901             # example, this would make the '=' align here:
1902             #  $ax         = 1;           # side comment
1903             #                             # hanging side comment
1904             #  $boondoggle = 5;           # side comment
1905             #  $beetle     = 5;           # side comment
1906
1907             # here is another example..
1908
1909             #  _rtoc_name_count   => {},                   # hash to track ..
1910             #  _rpackage_stack    => [],                   # stack to check ..
1911             #                                              # name changes
1912             #  _rlast_level       => \$last_level,         # brace indentation
1913             #
1914             #
1915             # If this were not desired, the next step could be skipped.
1916             # -------------------------------------------------------------
1917             if ( $new_line->{'is_hanging_side_comment'} ) {
1918                 join_hanging_comment( $new_line, $base_line );
1919             }
1920
1921             # If this line has no matching tokens, then flush out the lines
1922             # BEFORE this line unless both it and the previous line have side
1923             # comments.  This prevents this line from pushing side comments out
1924             # to the right.
1925             elsif ( $new_line->{'jmax'} == 1 ) {
1926
1927                 # There are no matching tokens, so now check side comments.
1928                 # Programming note: accessing arrays with index -1 is
1929                 # risky in Perl, but we have verified there is at least one
1930                 # line in the group and that there is at least one field.
1931                 my $prev_comment =
1932                   $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
1933                 my $side_comment = $new_line->{'rfields'}->[-1];
1934                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1935             }
1936
1937             # See if the new line matches and fits the current group,
1938             # if it still exists. Flush the current group if not.
1939             my $match_code;
1940             if ($group_line_count) {
1941                 ( $match_code, my $imax_align ) =
1942                   $self->check_match( $new_line, $base_line,
1943                     $rall_lines->[ $jline - 1 ] );
1944                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1945             }
1946
1947             # Store the new line
1948             add_to_rgroup($jline);
1949
1950             if ( defined($j_terminal_match) ) {
1951
1952                 # Decide if we should fix a terminal match. We can either:
1953                 # 1. fix it and prevent the sweep_lr from changing it, or
1954                 # 2. leave it alone and let sweep_lr try to fix it.
1955
1956                 # The current logic is to fix it if:
1957                 # -it has not joined to previous lines,
1958                 # -and either the previous subgroup has just 1 line, or
1959                 # -this line matched but did not fit (so sweep won't work)
1960                 my $fixit;
1961                 if ( $group_line_count == 1 ) {
1962                     $fixit ||= $match_code;
1963                     if ( !$fixit ) {
1964                         if ( @{$rgroups} > 1 ) {
1965                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1966                             my $nlines = $jendx - $jbegx + 1;
1967                             $fixit ||= $nlines <= 1;
1968                         }
1969                     }
1970                 }
1971
1972                 if ($fixit) {
1973                     $base_line = $new_line;
1974                     my $col_now = $base_line->get_column($j_terminal_match);
1975
1976                     # Ignore an undefined value as a defensive step; shouldn't
1977                     # normally happen.
1978                     $col_now = 0 unless defined($col_now);
1979
1980                     my $pad = $col_matching_terminal - $col_now;
1981                     my $padding_available =
1982                       $base_line->get_available_space_on_right();
1983                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1984                         $base_line->increase_field_width( $j_terminal_match,
1985                             $pad );
1986                     }
1987
1988                     # do not let sweep_left_to_right change an isolated 'else'
1989                     if ( !$new_line->{'is_terminal_ternary'} ) {
1990                         block_penultimate_match();
1991                     }
1992                 }
1993                 end_rgroup(-1);
1994             }
1995
1996             # end the group if we know we cannot match next line.
1997             elsif ( $new_line->{'end_group'} ) {
1998                 end_rgroup(-1);
1999             }
2000         } ## end loop over lines
2001
2002         end_rgroup(-1);
2003         return ($rgroups);
2004     }
2005 }
2006
2007 sub two_line_pad {
2008
2009     my ( $line_m, $line, $imax_min ) = @_;
2010
2011     # Given:
2012     #  two isolated (list) lines
2013     #  imax_min = number of common alignment tokens
2014     # Return:
2015     #  $pad_max = maximum suggested pad distance
2016     #           = 0 if alignment not recommended
2017     # Note that this is only for two lines which do not have alignment tokens
2018     # in common with any other lines.  It is intended for lists, but it might
2019     # also be used for two non-list lines with a common leading '='.
2020
2021     # Allow alignment if the difference in the two unpadded line lengths
2022     # is not more than either line length.  The idea is to avoid
2023     # aligning lines with very different field lengths, like these two:
2024
2025     #   [
2026     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2027     #       1, 0, 0, 0, undef, 0, 0
2028     #   ];
2029     my $rfield_lengths   = $line->{'rfield_lengths'};
2030     my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2031
2032     # Safety check - shouldn't happen
2033     return 0
2034       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
2035
2036     my $lensum_m = 0;
2037     my $lensum   = 0;
2038     foreach my $i ( 0 .. $imax_min ) {
2039         $lensum_m += $rfield_lengths_m->[$i];
2040         $lensum   += $rfield_lengths->[$i];
2041     }
2042
2043     my ( $lenmin, $lenmax ) =
2044       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2045
2046     my $patterns_match;
2047     if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2048         $patterns_match = 1;
2049         my $rpatterns_m = $line_m->{'rpatterns'};
2050         my $rpatterns   = $line->{'rpatterns'};
2051         foreach my $i ( 0 .. $imax_min ) {
2052             my $pat   = $rpatterns->[$i];
2053             my $pat_m = $rpatterns_m->[$i];
2054             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
2055         }
2056     }
2057
2058     my $pad_max = $lenmax;
2059     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
2060
2061     return $pad_max;
2062 }
2063
2064 sub sweep_left_to_right {
2065
2066     my ( $rlines, $rgroups, $group_level ) = @_;
2067
2068     # So far we have divided the lines into groups having an equal number of
2069     # identical alignments.  Here we are going to look for common leading
2070     # alignments between the different groups and align them when possible.
2071     # For example, the three lines below are in three groups because each line
2072     # has a different number of commas.  In this routine we will sweep from
2073     # left to right, aligning the leading commas as we go, but stopping if we
2074     # hit the line length limit.
2075
2076     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
2077     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
2078     #  my ( $km,  $area, $varea );
2079
2080     # nothing to do if just one group
2081     my $ng_max = @{$rgroups} - 1;
2082     return unless ( $ng_max > 0 );
2083
2084     #---------------------------------------------------------------------
2085     # Step 1: Loop over groups to find all common leading alignment tokens
2086     #---------------------------------------------------------------------
2087
2088     my $line;
2089     my $rtokens;
2090     my $imax;     # index of maximum non-side-comment alignment token
2091     my $istop;    # an optional stopping index
2092     my $jbeg;     # starting line index
2093     my $jend;     # ending line index
2094
2095     my $line_m;
2096     my $rtokens_m;
2097     my $imax_m;
2098     my $istop_m;
2099     my $jbeg_m;
2100     my $jend_m;
2101
2102     my $istop_mm;
2103
2104     # Look at neighboring pairs of groups and form a simple list
2105     # of all common leading alignment tokens. Foreach such match we
2106     # store [$i, $ng], where
2107     #  $i = index of the token in the line (0,1,...)
2108     #  $ng is the second of the two groups with this common token
2109     my @icommon;
2110
2111     # Hash to hold the maximum alignment change for any group
2112     my %max_move;
2113
2114     # a small number of columns
2115     my $short_pad = 4;
2116
2117     my $ng = -1;
2118     foreach my $item ( @{$rgroups} ) {
2119         $ng++;
2120
2121         $istop_mm = $istop_m;
2122
2123         # save _m values of previous group
2124         $line_m    = $line;
2125         $rtokens_m = $rtokens;
2126         $imax_m    = $imax;
2127         $istop_m   = $istop;
2128         $jbeg_m    = $jbeg;
2129         $jend_m    = $jend;
2130
2131         # Get values for this group. Note that we just have to use values for
2132         # one of the lines of the group since all members have the same
2133         # alignments.
2134         ( $jbeg, $jend, $istop ) = @{$item};
2135
2136         $line    = $rlines->[$jbeg];
2137         $rtokens = $line->{'rtokens'};
2138         $imax    = $line->{'jmax'} - 2;
2139         $istop   = -1 unless ( defined($istop) );
2140         $istop   = $imax if ( $istop > $imax );
2141
2142         # Initialize on first group
2143         next if ( $ng == 0 );
2144
2145         # Use the minimum index limit of the two groups
2146         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2147
2148         # Also impose a limit if given.
2149         if ( $istop_m < $imax_min ) {
2150             $imax_min = $istop_m;
2151         }
2152
2153         # Special treatment of two one-line groups isolated from other lines,
2154         # unless they form a simple list or a terminal match.  Otherwise the
2155         # alignment can look strange in some cases.
2156         my $list_type = $rlines->[$jbeg]->{'list_type'};
2157         if (
2158                $jend == $jbeg
2159             && $jend_m == $jbeg_m
2160             && ( $ng == 1 || $istop_mm < 0 )
2161             && ( $ng == $ng_max || $istop < 0 )
2162             && !$line->{'j_terminal_match'}
2163
2164             # Only do this for imperfect matches. This is normally true except
2165             # when two perfect matches cannot form a group because the line
2166             # length limit would be exceeded. In that case we can still try
2167             # to match as many alignments as possible.
2168             && ( $imax != $imax_m || $istop_m != $imax_m )
2169           )
2170         {
2171
2172             # We will just align assignments and simple lists
2173             next unless ( $imax_min >= 0 );
2174             next
2175               unless ( $rtokens->[0] =~ /^=\d/
2176                 || $list_type );
2177
2178             # In this case we will limit padding to a short distance.  This
2179             # is a compromise to keep some vertical alignment but prevent large
2180             # gaps, which do not look good for just two lines.
2181             my $pad_max =
2182               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2183             next unless ($pad_max);
2184             my $ng_m = $ng - 1;
2185             $max_move{"$ng_m"} = $pad_max;
2186             $max_move{"$ng"}   = $pad_max;
2187         }
2188
2189         # Loop to find all common leading tokens.
2190         if ( $imax_min >= 0 ) {
2191             foreach my $i ( 0 .. $imax_min ) {
2192                 my $tok   = $rtokens->[$i];
2193                 my $tok_m = $rtokens_m->[$i];
2194                 last if ( $tok ne $tok_m );
2195                 push @icommon, [ $i, $ng, $tok ];
2196             }
2197         }
2198     }
2199     return unless @icommon;
2200
2201     #----------------------------------------------------------
2202     # Step 2: Reorder and consolidate the list into a task list
2203     #----------------------------------------------------------
2204
2205     # We have to work first from lowest token index to highest, then by group,
2206     # sort our list first on token index then group number
2207     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
2208
2209     # Make a task list of the form
2210     #   [$i, ng_beg, $ng_end, $tok], ..
2211     # where
2212     #   $i is the index of the token to be aligned
2213     #   $ng_beg..$ng_end is the group range for this action
2214     my @todo;
2215     my ( $i, $ng_end, $tok );
2216     foreach my $item (@icommon) {
2217         my $ng_last = $ng_end;
2218         my $i_last  = $i;
2219         ( $i, $ng_end, $tok ) = @{$item};
2220         my $ng_beg = $ng_end - 1;
2221         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
2222             my $var = pop(@todo);
2223             $ng_beg = $var->[1];
2224         }
2225         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2226         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2227     }
2228
2229     #------------------------------
2230     # Step 3: Execute the task list
2231     #------------------------------
2232     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2233         $group_level );
2234     return;
2235 }
2236
2237 {    ## closure for sub do_left_to_right_sweep
2238
2239     my %is_good_alignment_token;
2240
2241     BEGIN {
2242
2243         # One of the most difficult aspects of vertical alignment is knowing
2244         # when not to align.  Alignment can go from looking very nice to very
2245         # bad when overdone.  In the sweep algorithm there are two special
2246         # cases where we may need to limit padding to a '$short_pad' distance
2247         # to avoid some very ugly formatting:
2248
2249         # 1. Two isolated lines with partial alignment
2250         # 2. A 'tail-wag-dog' situation, in which a single terminal
2251         #    line with partial alignment could cause a significant pad
2252         #    increase in many previous lines if allowed to join the alignment.
2253
2254         # For most alignment tokens, we will allow only a small pad to be
2255         # introduced (the hardwired $short_pad variable) . But for some 'good'
2256         # alignments we can be less restrictive.
2257
2258         # These are 'good' alignments, which are allowed more padding:
2259         my @q = qw(
2260           => = ? if unless or || {
2261         );
2262         push @q, ',';
2263         @is_good_alignment_token{@q} = (0) x scalar(@q);
2264
2265         # Promote a few of these to 'best', with essentially no pad limit:
2266         $is_good_alignment_token{'='}      = 1;
2267         $is_good_alignment_token{'if'}     = 1;
2268         $is_good_alignment_token{'unless'} = 1;
2269         $is_good_alignment_token{'=>'}     = 1
2270
2271           # Note the hash values are set so that:
2272           #         if ($is_good_alignment_token{$raw_tok}) => best
2273           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2274
2275     }
2276
2277     sub do_left_to_right_sweep {
2278         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2279           = @_;
2280
2281         # $blocking_level[$nj is the level at a match failure between groups
2282         # $ng-1 and $ng
2283         my @blocking_level;
2284         my $group_list_type = $rlines->[0]->{'list_type'};
2285
2286         my $move_to_common_column = sub {
2287
2288             # Move the alignment column of token $itok to $col_want for a
2289             # sequence of groups.
2290             my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
2291             return unless ( defined($ngb) && $nge > $ngb );
2292             foreach my $ng ( $ngb .. $nge ) {
2293
2294                 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2295                 my $line  = $rlines->[$jbeg];
2296                 my $col   = $line->get_column($itok);
2297                 my $avail = $line->get_available_space_on_right();
2298                 my $move  = $col_want - $col;
2299                 if ( $move > 0 ) {
2300
2301                     # limit padding increase in isolated two lines
2302                     next
2303                       if ( defined( $rmax_move->{$ng} )
2304                         && $move > $rmax_move->{$ng}
2305                         && !$is_good_alignment_token{$raw_tok} );
2306
2307                     $line->increase_field_width( $itok, $move );
2308                 }
2309                 elsif ( $move < 0 ) {
2310
2311                     # spot to take special action on failure to move
2312                 }
2313             }
2314             return;
2315         };
2316
2317         foreach my $task ( @{$rtodo} ) {
2318             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2319
2320             # Nothing to do for a single group
2321             next unless ( $ng_end > $ng_beg );
2322
2323             my $ng_first;  # index of the first group of a continuous sequence
2324             my $col_want;  # the common alignment column of a sequence of groups
2325             my $col_limit; # maximum column before bumping into max line length
2326             my $line_count_ng_m = 0;
2327             my $jmax_m;
2328             my $it_stop_m;
2329
2330             # Loop over the groups
2331             # 'ix_' = index in the array of lines
2332             # 'ng_' = index in the array of groups
2333             # 'it_' = index in the array of tokens
2334             my $ix_min      = $rgroups->[$ng_beg]->[0];
2335             my $ix_max      = $rgroups->[$ng_end]->[1];
2336             my $lines_total = $ix_max - $ix_min + 1;
2337             foreach my $ng ( $ng_beg .. $ng_end ) {
2338                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2339                 my $line_count_ng = $ix_end - $ix_beg + 1;
2340
2341                 # Important: note that since all lines in a group have a common
2342                 # alignments object, we just have to work on one of the lines
2343                 # (the first line).  All of the rest will be changed
2344                 # automatically.
2345                 my $line = $rlines->[$ix_beg];
2346                 my $jmax = $line->{'jmax'};
2347
2348                 # the maximum space without exceeding the line length:
2349                 my $avail   = $line->get_available_space_on_right();
2350                 my $col     = $line->get_column($itok);
2351                 my $col_max = $col + $avail;
2352
2353                 # Initialize on first group
2354                 if ( !defined($col_want) ) {
2355                     $ng_first        = $ng;
2356                     $col_want        = $col;
2357                     $col_limit       = $col_max;
2358                     $line_count_ng_m = $line_count_ng;
2359                     $jmax_m          = $jmax;
2360                     $it_stop_m       = $it_stop;
2361                     next;
2362                 }
2363
2364                 # RULE: Throw a blocking flag upon encountering a token level
2365                 # different from the level of the first blocking token.  For
2366                 # example, in the following example, if the = matches get
2367                 # blocked between two groups as shown, then we want to start
2368                 # blocking matches at the commas, which are at deeper level, so
2369                 # that we do not get the big gaps shown here:
2370
2371                 #  my $unknown3 = pack( "v",          -2 );
2372                 #  my $unknown4 = pack( "v",          0x09 );
2373                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
2374                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
2375                 #  my $root_startblock = pack( "V",   $root_start );
2376                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
2377
2378                 # On the other hand, it is okay to keep matching at the same
2379                 # level such as in a simple list of commas and/or fat commas.
2380
2381                 my $is_blocked = defined( $blocking_level[$ng] )
2382                   && $lev > $blocking_level[$ng];
2383
2384                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2385                 # Do not let one or two lines with a **different number of
2386                 # alignments** open up a big gap in a large block.  For
2387                 # example, we will prevent something like this, where the first
2388                 # line pries open the rest:
2389
2390             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2391             #  $worksheet->write( "C7", "",                    $format );
2392             #  $worksheet->write( "D7", "",                    $format );
2393             #  $worksheet->write( "D8", "",                    $format );
2394             #  $worksheet->write( "D8", "",                    $format );
2395
2396                 # We should exclude from consideration two groups which are
2397                 # effectively the same but separated because one does not
2398                 # fit in the maximum allowed line length.
2399                 my $is_same_group =
2400                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2401
2402                 my $lines_above = $ix_beg - $ix_min;
2403                 my $lines_below = $lines_total - $lines_above;
2404
2405                 # Increase the tolerable gap for certain favorable factors
2406                 my $factor    = 1;
2407                 my $top_level = $lev == $group_level;
2408
2409                 # Align best top level alignment tokens like '=', 'if', ...
2410                 # A factor of 10 allows a gap of up to 40 spaces
2411                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2412                     $factor = 10;
2413                 }
2414
2415                 # Otherwise allow some minimal padding of good alignments
2416                 elsif (
2417
2418                     defined( $is_good_alignment_token{$raw_tok} )
2419
2420                     # We have to be careful if there are just 2 lines.  This
2421                     # two-line factor allows large gaps only for 2 lines which
2422                     # are simple lists with fewer items on the second line. It
2423                     # gives results similar to previous versions of perltidy.
2424                     && (   $lines_total > 2
2425                         || $group_list_type && $jmax < $jmax_m && $top_level )
2426                   )
2427                 {
2428                     $factor += 1;
2429                     if ($top_level) {
2430                         $factor += 1;
2431                     }
2432                 }
2433
2434                 my $is_big_gap;
2435                 if ( !$is_same_group ) {
2436                     $is_big_gap ||=
2437                       (      $lines_above == 1
2438                           || $lines_above == 2 && $lines_below >= 4 )
2439                       && $col_want > $col + $short_pad * $factor;
2440                     $is_big_gap ||=
2441                       (      $lines_below == 1
2442                           || $lines_below == 2 && $lines_above >= 4 )
2443                       && $col > $col_want + $short_pad * $factor;
2444                 }
2445
2446                 # if match is limited by gap size, stop aligning at this level
2447                 if ($is_big_gap) {
2448                     $blocking_level[$ng] = $lev - 1;
2449                 }
2450
2451                 # quit and restart if it cannot join this batch
2452                 if (   $col_want > $col_max
2453                     || $col > $col_limit
2454                     || $is_big_gap
2455                     || $is_blocked )
2456                 {
2457
2458                     # remember the level of the first blocking token
2459                     if ( !defined( $blocking_level[$ng] ) ) {
2460                         $blocking_level[$ng] = $lev;
2461                     }
2462
2463                     $move_to_common_column->(
2464                         $ng_first, $ng - 1, $itok, $col_want, $raw_tok
2465                     );
2466                     $ng_first        = $ng;
2467                     $col_want        = $col;
2468                     $col_limit       = $col_max;
2469                     $line_count_ng_m = $line_count_ng;
2470                     $jmax_m          = $jmax;
2471                     $it_stop_m       = $it_stop;
2472                     next;
2473                 }
2474
2475                 $line_count_ng_m += $line_count_ng;
2476
2477                 # update the common column and limit
2478                 if ( $col > $col_want )      { $col_want  = $col }
2479                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2480
2481             } ## end loop over groups
2482
2483             if ( $ng_end > $ng_first ) {
2484                 $move_to_common_column->(
2485                     $ng_first, $ng_end, $itok, $col_want, $raw_tok
2486                 );
2487             } ## end loop over groups for one task
2488         } ## end loop over tasks
2489
2490         return;
2491     }
2492 }
2493
2494 sub delete_selected_tokens {
2495
2496     my ( $line_obj, $ridel ) = @_;
2497
2498     # $line_obj    is the line to be modified
2499     # $ridel       is a ref to list of indexes to be deleted
2500
2501     # remove an unused alignment token(s) to improve alignment chances
2502
2503     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2504
2505     my $jmax_old           = $line_obj->{'jmax'};
2506     my $rfields_old        = $line_obj->{'rfields'};
2507     my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2508     my $rpatterns_old      = $line_obj->{'rpatterns'};
2509     my $rtokens_old        = $line_obj->{'rtokens'};
2510     my $j_terminal_match   = $line_obj->{'j_terminal_match'};
2511
2512     use constant EXPLAIN_DELETE_SELECTED => 0;
2513
2514     local $LIST_SEPARATOR = '> <';
2515     EXPLAIN_DELETE_SELECTED && print <<EOM;
2516 delete indexes: <@{$ridel}>
2517 old jmax: $jmax_old
2518 old tokens: <@{$rtokens_old}>
2519 old patterns: <@{$rpatterns_old}>
2520 old fields: <@{$rfields_old}>
2521 old field_lengths: <@{$rfield_lengths_old}>
2522 EOM
2523
2524     my $rfields_new        = [];
2525     my $rpatterns_new      = [];
2526     my $rtokens_new        = [];
2527     my $rfield_lengths_new = [];
2528
2529     # Convert deletion list to a hash to allow any order, multiple entries,
2530     # and avoid problems with index values out of range
2531     my %delete_me;
2532     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2533
2534     my $pattern_0      = $rpatterns_old->[0];
2535     my $field_0        = $rfields_old->[0];
2536     my $field_length_0 = $rfield_lengths_old->[0];
2537     push @{$rfields_new},        $field_0;
2538     push @{$rfield_lengths_new}, $field_length_0;
2539     push @{$rpatterns_new},      $pattern_0;
2540
2541     # Loop to either copy items or concatenate fields and patterns
2542     my $jmin_del;
2543     foreach my $j ( 0 .. $jmax_old - 1 ) {
2544         my $token        = $rtokens_old->[$j];
2545         my $field        = $rfields_old->[ $j + 1 ];
2546         my $field_length = $rfield_lengths_old->[ $j + 1 ];
2547         my $pattern      = $rpatterns_old->[ $j + 1 ];
2548         if ( !$delete_me{$j} ) {
2549             push @{$rtokens_new},        $token;
2550             push @{$rfields_new},        $field;
2551             push @{$rpatterns_new},      $pattern;
2552             push @{$rfield_lengths_new}, $field_length;
2553         }
2554         else {
2555             if ( !defined($jmin_del) ) { $jmin_del = $j }
2556             $rfields_new->[-1] .= $field;
2557             $rfield_lengths_new->[-1] += $field_length;
2558             $rpatterns_new->[-1] .= $pattern;
2559         }
2560     }
2561
2562     # ----- x ------ x ------ x ------
2563     #t      0        1        2        <- token indexing
2564     #f   0      1        2        3    <- field and pattern
2565
2566     my $jmax_new = @{$rfields_new} - 1;
2567     $line_obj->{'rtokens'}        = $rtokens_new;
2568     $line_obj->{'rpatterns'}      = $rpatterns_new;
2569     $line_obj->{'rfields'}        = $rfields_new;
2570     $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2571     $line_obj->{'jmax'}           = $jmax_new;
2572
2573     # The value of j_terminal_match will be incorrect if we delete tokens prior
2574     # to it. We will have to give up on aligning the terminal tokens if this
2575     # happens.
2576     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2577         $line_obj->{'j_terminal_match'} = undef;
2578     }
2579
2580     # update list type -
2581     if ( $line_obj->{'list_seqno'} ) {
2582
2583         ## This works, but for efficiency see if we need to make a change:
2584         ## decide_if_list($line_obj);
2585
2586         # An existing list will still be a list but with possibly different
2587         # leading token
2588         my $old_list_type = $line_obj->{'list_type'};
2589         my $new_list_type = EMPTY_STRING;
2590         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2591             $new_list_type = $rtokens_new->[0];
2592         }
2593         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2594             decide_if_list($line_obj);
2595         }
2596     }
2597
2598     EXPLAIN_DELETE_SELECTED && print <<EOM;
2599
2600 new jmax: $jmax_new
2601 new tokens: <@{$rtokens_new}>
2602 new patterns: <@{$rpatterns_new}>
2603 new fields: <@{$rfields_new}>
2604 EOM
2605     return;
2606 }
2607
2608 {    ## closure for sub decode_alignment_token
2609
2610     # This routine is called repeatedly for each token, so it needs to be
2611     # efficient.  We can speed things up by remembering the inputs and outputs
2612     # in a hash.
2613     my %decoded_token;
2614
2615     sub initialize_decode {
2616
2617         # We will re-initialize the hash for each file. Otherwise, there is
2618         # a danger that the hash can become arbitrarily large if a very large
2619         # number of files is processed at once.
2620         %decoded_token = ();
2621         return;
2622     }
2623
2624     sub decode_alignment_token {
2625
2626         # Unpack the values packed in an alignment token
2627         #
2628         # Usage:
2629         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
2630         #          decode_alignment_token($token);
2631
2632         # Alignment tokens have a trailing decimal level and optional tag (for
2633         # commas):
2634         # For example, the first comma in the following line
2635         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
2636         # is decorated as follows:
2637         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
2638
2639         # An optional token count may be appended with a leading dot.
2640         # Currently this is only done for '=' tokens but this could change.
2641         # For example, consider the following line:
2642         #   $nport   = $port = shift || $name;
2643         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2644         # The second '=' will be '=0.2' [level 0, second equals]
2645         my ($tok) = @_;
2646
2647         if ( defined( $decoded_token{$tok} ) ) {
2648             return @{ $decoded_token{$tok} };
2649         }
2650
2651         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2652         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2653             $raw_tok   = $1;
2654             $lev       = $2;
2655             $tag       = $3 if ($3);
2656             $tok_count = $5 if ($5);
2657         }
2658         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2659         $decoded_token{$tok} = \@vals;
2660         return @vals;
2661     }
2662 }
2663
2664 {    ## closure for sub delete_unmatched_tokens
2665
2666     my %is_assignment;
2667     my %keep_after_deleted_assignment;
2668
2669     BEGIN {
2670         my @q;
2671
2672         @q = qw(
2673           = **= += *= &= <<= &&=
2674           -= /= |= >>= ||= //=
2675           .= %= ^=
2676           x=
2677         );
2678         @is_assignment{@q} = (1) x scalar(@q);
2679
2680         # These tokens may be kept following an = deletion
2681         @q = qw(
2682           if unless or ||
2683         );
2684         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2685
2686     }
2687
2688     sub delete_unmatched_tokens {
2689         my ( $rlines, $group_level ) = @_;
2690
2691         # This is a important first step in vertical alignment in which
2692         # we remove as many obviously un-needed alignment tokens as possible.
2693         # This will prevent them from interfering with the final alignment.
2694
2695         # Returns:
2696         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
2697         my $saw_side_comment = 0;    # used to avoid a call for side comments
2698
2699         # Handle no lines -- shouldn't happen
2700         return unless @{$rlines};
2701
2702         # Handle a single line
2703         if ( @{$rlines} == 1 ) {
2704             my $line   = $rlines->[0];
2705             my $jmax   = $line->{'jmax'};
2706             my $length = $line->{'rfield_lengths'}->[$jmax];
2707             $saw_side_comment = $length > 0;
2708             return ( $max_lev_diff, $saw_side_comment );
2709         }
2710
2711         my $has_terminal_match = $rlines->[-1]->{'j_terminal_match'};
2712
2713         # ignore hanging side comments in these operations
2714         my @filtered   = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
2715         my $rnew_lines = \@filtered;
2716
2717         $saw_side_comment = @filtered != @{$rlines};
2718         $max_lev_diff     = 0;
2719
2720         # nothing to do if all lines were hanging side comments
2721         my $jmax = @{$rnew_lines} - 1;
2722         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2723
2724         my @equals_info;
2725         my @line_info;
2726
2727         #------------------------------------------------------------
2728         # Loop to create a hash of alignment token info for each line
2729         #------------------------------------------------------------
2730         my $rline_hashes = [];
2731         foreach my $line ( @{$rnew_lines} ) {
2732             my $rhash     = {};
2733             my $rtokens   = $line->{'rtokens'};
2734             my $rpatterns = $line->{'rpatterns'};
2735             my $i         = 0;
2736             my ( $i_eq, $tok_eq, $pat_eq );
2737             my ( $lev_min, $lev_max );
2738             foreach my $tok ( @{$rtokens} ) {
2739                 my ( $raw_tok, $lev, $tag, $tok_count ) =
2740                   decode_alignment_token($tok);
2741
2742                 if ( $tok ne '#' ) {
2743                     if ( !defined($lev_min) ) {
2744                         $lev_min = $lev;
2745                         $lev_max = $lev;
2746                     }
2747                     else {
2748                         if ( $lev < $lev_min ) { $lev_min = $lev }
2749                         if ( $lev > $lev_max ) { $lev_max = $lev }
2750                     }
2751                 }
2752                 else {
2753                     if ( !$saw_side_comment ) {
2754                         my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
2755                         $saw_side_comment ||= $length;
2756                     }
2757                 }
2758
2759                 # Possible future upgrade: for multiple matches,
2760                 # record [$i1, $i2, ..] instead of $i
2761                 $rhash->{$tok} =
2762                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2763
2764                 # remember the first equals at line level
2765                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2766
2767                     if ( $lev eq $group_level ) {
2768                         $i_eq   = $i;
2769                         $tok_eq = $tok;
2770                         $pat_eq = $rpatterns->[$i];
2771                     }
2772                 }
2773                 $i++;
2774             }
2775             push @{$rline_hashes}, $rhash;
2776             push @equals_info,     [ $i_eq,    $tok_eq, $pat_eq ];
2777             push @line_info,       [ $lev_min, $lev_max ];
2778             if ( defined($lev_min) ) {
2779                 my $lev_diff = $lev_max - $lev_min;
2780                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2781             }
2782         }
2783
2784         #----------------------------------------------------
2785         # Loop to compare each line pair and remember matches
2786         #----------------------------------------------------
2787         my $rtok_hash = {};
2788         my $nr        = 0;
2789         foreach my $jl ( 0 .. $jmax - 1 ) {
2790             my $nl = $nr;
2791             $nr = 0;
2792             my $jr      = $jl + 1;
2793             my $rhash_l = $rline_hashes->[$jl];
2794             my $rhash_r = $rline_hashes->[$jr];
2795             foreach my $tok ( keys %{$rhash_l} ) {
2796                 if ( defined( $rhash_r->{$tok} ) ) {
2797                     my $il = $rhash_l->{$tok}->[0];
2798                     my $ir = $rhash_r->{$tok}->[0];
2799                     $rhash_l->{$tok}->[2] = $ir;
2800                     $rhash_r->{$tok}->[1] = $il;
2801                     if ( $tok ne '#' ) {
2802                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2803                         $nr++;
2804                     }
2805                 }
2806             }
2807
2808             # Set a line break if no matching tokens between these lines
2809             # (this is not strictly necessary now but does not hurt)
2810             if ( $nr == 0 && $nl > 0 ) {
2811                 $rnew_lines->[$jl]->{'end_group'} = 1;
2812             }
2813
2814             # Also set a line break if both lines have simple equals but with
2815             # different leading characters in patterns.  This check is similar
2816             # to one in sub check_match, and will prevent sub
2817             # prune_alignment_tree from removing alignments which otherwise
2818             # should be kept. This fix is rarely needed, but it can
2819             # occasionally improve formatting.
2820             # For example:
2821             #     my $name = $this->{Name};
2822             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
2823             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
2824             #     my $cast    = ( $type     ? "($type *)" : "" );
2825             # The last two lines start with 'my' and will not match the
2826             # previous line starting with $type, so we do not want
2827             # prune_alignment tree to delete their ? : alignments at a deeper
2828             # level.
2829             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2830             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2831             if ( defined($i_eq_l) && defined($i_eq_r) ) {
2832
2833                 # Also, do not align equals across a change in ci level
2834                 my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
2835                   $rnew_lines->[$jr]->{'ci_level'};
2836
2837                 if (
2838                        $tok_eq_l eq $tok_eq_r
2839                     && $i_eq_l == 0
2840                     && $i_eq_r == 0
2841                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2842                         || $ci_jump )
2843                   )
2844                 {
2845                     $rnew_lines->[$jl]->{'end_group'} = 1;
2846                 }
2847             }
2848         }
2849
2850         #------------------------------------------------------------
2851         # Find independent subgroups of lines.  Neighboring subgroups
2852         # do not have a common alignment token.
2853         #------------------------------------------------------------
2854         my @subgroups;
2855         push @subgroups, [ 0, $jmax ];
2856         foreach my $jl ( 0 .. $jmax - 1 ) {
2857             if ( $rnew_lines->[$jl]->{'end_group'} ) {
2858                 $subgroups[-1]->[1] = $jl;
2859                 push @subgroups, [ $jl + 1, $jmax ];
2860             }
2861         }
2862
2863         # flag to allow skipping pass 2
2864         my $saw_large_group;
2865
2866         #-----------------------------------------------------------
2867         # PASS 1 over subgroups to remove unmatched alignment tokens
2868         #-----------------------------------------------------------
2869         foreach my $item (@subgroups) {
2870             my ( $jbeg, $jend ) = @{$item};
2871
2872             my $nlines = $jend - $jbeg + 1;
2873
2874             #---------------------------------------------------
2875             # Look for complete if/elsif/else and ternary blocks
2876             #---------------------------------------------------
2877
2878             # We are looking for a common '$dividing_token' like these:
2879
2880             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
2881             #    elsif ($b)          { $p->{'type'} = 'b'; }
2882             #    elsif ($s)          { $p->{'type'} = 's'; }
2883             #    else                { $p->{'type'} = ''; }
2884             #                        ^----------- dividing_token
2885
2886             #   my $severity =
2887             #      !$routine                     ? '[PFX]'
2888             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
2889             #     : $routine =~ /ck_warn/        ? 'W'
2890             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2891             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
2892             #     : $routine =~ /vWARN\d/        ? '[WDS]'
2893             #     :                                '[PFX]';
2894             #                                    ^----------- dividing_token
2895
2896             # Only look for groups which are more than 2 lines long.  Two lines
2897             # can get messed up doing this, probably due to the various
2898             # two-line rules.
2899
2900             my $dividing_token;
2901             my %token_line_count;
2902             if ( $nlines > 2 ) {
2903
2904                 foreach my $jj ( $jbeg .. $jend ) {
2905                     my %seen;
2906                     my $line    = $rnew_lines->[$jj];
2907                     my $rtokens = $line->{'rtokens'};
2908                     foreach my $tok ( @{$rtokens} ) {
2909                         if ( !$seen{$tok} ) {
2910                             $seen{$tok}++;
2911                             $token_line_count{$tok}++;
2912                         }
2913                     }
2914                 }
2915
2916                 foreach my $tok ( keys %token_line_count ) {
2917                     if ( $token_line_count{$tok} == $nlines ) {
2918                         if (   substr( $tok, 0, 1 ) eq '?'
2919                             || substr( $tok, 0, 1 ) eq '{'
2920                             && $tok =~ /^\{\d+if/ )
2921                         {
2922                             $dividing_token = $tok;
2923                             last;
2924                         }
2925                     }
2926                 }
2927             }
2928
2929             #-------------------------------------------------------------
2930             # Loop over subgroup lines to remove unwanted alignment tokens
2931             #-------------------------------------------------------------
2932             foreach my $jj ( $jbeg .. $jend ) {
2933                 my $line    = $rnew_lines->[$jj];
2934                 my $rtokens = $line->{'rtokens'};
2935                 my $rhash   = $rline_hashes->[$jj];
2936                 my $i_eq    = $equals_info[$jj]->[0];
2937                 my @idel;
2938                 my $imax = @{$rtokens} - 2;
2939                 my $delete_above_level;
2940                 my $deleted_assignment_token;
2941
2942                 my $saw_dividing_token = EMPTY_STRING;
2943                 $saw_large_group ||= $nlines > 2 && $imax > 1;
2944
2945                 # Loop over all alignment tokens
2946                 foreach my $i ( 0 .. $imax ) {
2947                     my $tok = $rtokens->[$i];
2948                     next if ( $tok eq '#' );    # shouldn't happen
2949                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2950                       @{ $rhash->{$tok} };
2951
2952                     #------------------------------------------------------
2953                     # Here is the basic RULE: remove an unmatched alignment
2954                     # which does not occur in the surrounding lines.
2955                     #------------------------------------------------------
2956                     my $delete_me = !defined($il) && !defined($ir);
2957
2958                     # Apply any user controls. Note that not all lines pass
2959                     # this way so they have to be applied elsewhere too.
2960                     my $align_ok = 1;
2961                     if (%valign_control_hash) {
2962                         $align_ok = $valign_control_hash{$raw_tok};
2963                         $align_ok = $valign_control_default
2964                           unless defined($align_ok);
2965                         $delete_me ||= !$align_ok;
2966                     }
2967
2968                     # But now we modify this with exceptions...
2969
2970                     # EXCEPTION 1: If we are in a complete ternary or
2971                     # if/elsif/else group, and this token is not on every line
2972                     # of the group, should we delete it to preserve overall
2973                     # alignment?
2974                     if ($dividing_token) {
2975                         if ( $token_line_count{$tok} >= $nlines ) {
2976                             $saw_dividing_token ||= $tok eq $dividing_token;
2977                         }
2978                         else {
2979
2980                             # For shorter runs, delete toks to save alignment.
2981                             # For longer runs, keep toks after the '{' or '?'
2982                             # to allow sub-alignments within braces.  The
2983                             # number 5 lines is arbitrary but seems to work ok.
2984                             $delete_me ||=
2985                               ( $nlines < 5 || !$saw_dividing_token );
2986                         }
2987                     }
2988
2989                     # EXCEPTION 2: Remove all tokens above a certain level
2990                     # following a previous deletion.  For example, we have to
2991                     # remove tagged higher level alignment tokens following a
2992                     # '=>' deletion because the tags of higher level tokens
2993                     # will now be incorrect. For example, this will prevent
2994                     # aligning commas as follows after deleting the second '=>'
2995                     #    $w->insert(
2996                     #         ListBox => origin => [ 270, 160 ],
2997                     #         size    => [ 200,           55 ],
2998                     #    );
2999                     if ( defined($delete_above_level) ) {
3000                         if ( $lev > $delete_above_level ) {
3001                             $delete_me ||= 1;
3002                         }
3003                         else { $delete_above_level = undef }
3004                     }
3005
3006                     # EXCEPTION 3: Remove all but certain tokens after an
3007                     # assignment deletion.
3008                     if (
3009                         $deleted_assignment_token
3010                         && ( $lev > $group_level
3011                             || !$keep_after_deleted_assignment{$raw_tok} )
3012                       )
3013                     {
3014                         $delete_me ||= 1;
3015                     }
3016
3017                     # EXCEPTION 4: Do not touch the first line of a 2 line
3018                     # terminal match, such as below, because j_terminal has
3019                     # already been set.
3020                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3021                     #    else      { $tago = $tagc = ''; }
3022                     # But see snippets 'else1.t' and 'else2.t'
3023                     $delete_me = 0
3024                       if ( $jj == $jbeg
3025                         && $has_terminal_match
3026                         && $nlines == 2 );
3027
3028                     # EXCEPTION 5: misc additional rules for commas and equals
3029                     if ( $delete_me && $tok_count == 1 ) {
3030
3031                         # okay to delete second and higher copies of a token
3032
3033                         # for a comma...
3034                         if ( $raw_tok eq ',' ) {
3035
3036                             # Do not delete commas before an equals
3037                             $delete_me = 0
3038                               if ( defined($i_eq) && $i < $i_eq );
3039
3040                             # Do not delete line-level commas
3041                             $delete_me = 0 if ( $lev <= $group_level );
3042                         }
3043
3044                         # For an assignment at group level..
3045                         if (   $is_assignment{$raw_tok}
3046                             && $lev == $group_level )
3047                         {
3048
3049                             # Do not delete if it is the last alignment of
3050                             # multiple tokens; this will prevent some
3051                             # undesirable alignments
3052                             if ( $imax > 0 && $i == $imax ) {
3053                                 $delete_me = 0;
3054                             }
3055
3056                             # Otherwise, set a flag to delete most
3057                             # remaining tokens
3058                             else { $deleted_assignment_token = $raw_tok }
3059                         }
3060                     }
3061
3062                     # Do not let a user exclusion be reactivated by above rules
3063                     $delete_me ||= !$align_ok;
3064
3065                     #------------------------------------
3066                     # Add this token to the deletion list
3067                     #------------------------------------
3068                     if ($delete_me) {
3069                         push @idel, $i;
3070
3071                         # update deletion propagation flags
3072                         if ( !defined($delete_above_level)
3073                             || $lev < $delete_above_level )
3074                         {
3075
3076                             # delete all following higher level alignments
3077                             $delete_above_level = $lev;
3078
3079                             # but keep deleting after => to next lower level
3080                             # to avoid some bizarre alignments
3081                             if ( $raw_tok eq '=>' ) {
3082                                 $delete_above_level = $lev - 1;
3083                             }
3084                         }
3085                     }
3086                 }    # End loop over alignment tokens
3087
3088                 # Process all deletion requests for this line
3089                 if (@idel) {
3090                     delete_selected_tokens( $line, \@idel );
3091                 }
3092             }    # End loopover lines
3093         }    # End loop over subgroups
3094
3095         # End PASS 1
3096
3097         #----------------------------------------------------------------
3098         # PASS 2: Construct a tree of matched lines and delete some small
3099         # deeper levels of tokens.  They also block good alignments.
3100         #----------------------------------------------------------------
3101         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
3102
3103         #--------------------------------------------
3104         # PASS 3: compare all lines for common tokens
3105         #--------------------------------------------
3106         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
3107
3108         return ( $max_lev_diff, $saw_side_comment );
3109     }
3110 }
3111
3112 sub match_line_pairs {
3113     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3114
3115     # Compare each pair of lines and save information about common matches
3116     # $rlines     = list of lines including hanging side comments
3117     # $rnew_lines = list of lines without any hanging side comments
3118     # $rsubgroups = list of subgroups of the new lines
3119
3120     # TODO:
3121     # Maybe change: imax_pair => pair_match_info = ref to array
3122     #  = [$imax_align, $rMsg, ... ]
3123     #  This may eventually have multi-level match info
3124
3125     # Previous line vars
3126     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3127         $list_type_m, $ci_level_m );
3128
3129     # Current line vars
3130     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3131         $ci_level );
3132
3133     use constant EXPLAIN_COMPARE_PATTERNS => 0;
3134
3135     my $compare_patterns = sub {
3136
3137         # helper routine to decide if patterns match well enough..
3138         # return code:
3139         #   0 = patterns match, continue
3140         #   1 = no match
3141         #   2 = no match, and lines do not match at all
3142
3143         my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3144         my $GoToMsg     = EMPTY_STRING;
3145         my $return_code = 0;
3146
3147         my ( $alignment_token, $lev, $tag, $tok_count ) =
3148           decode_alignment_token($tok);
3149
3150         # We have to be very careful about aligning commas
3151         # when the pattern's don't match, because it can be
3152         # worse to create an alignment where none is needed
3153         # than to omit one.  Here's an example where the ','s
3154         # are not in named containers.  The first line below
3155         # should not match the next two:
3156         #   ( $a, $b ) = ( $b, $r );
3157         #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3158         #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3159         if ( $alignment_token eq ',' ) {
3160
3161             # do not align commas unless they are in named
3162             # containers
3163             if ( $tok !~ /[A-Za-z]/ ) {
3164                 $return_code = 1;
3165                 $GoToMsg     = "do not align commas in unnamed containers";
3166             }
3167             else {
3168                 $return_code = 0;
3169             }
3170         }
3171
3172         # do not align parens unless patterns match;
3173         # large ugly spaces can occur in math expressions.
3174         elsif ( $alignment_token eq '(' ) {
3175
3176             # But we can allow a match if the parens don't
3177             # require any padding.
3178             if ( $pad != 0 ) {
3179                 $return_code = 1;
3180                 $GoToMsg = "do not align '(' unless patterns match or pad=0";
3181             }
3182             else {
3183                 $return_code = 0;
3184             }
3185         }
3186
3187         # Handle an '=' alignment with different patterns to
3188         # the left.
3189         elsif ( $alignment_token eq '=' ) {
3190
3191             # It is best to be a little restrictive when
3192             # aligning '=' tokens.  Here is an example of
3193             # two lines that we will not align:
3194             #       my $variable=6;
3195             #       $bb=4;
3196             # The problem is that one is a 'my' declaration,
3197             # and the other isn't, so they're not very similar.
3198             # We will filter these out by comparing the first
3199             # letter of the pattern.  This is crude, but works
3200             # well enough.
3201             if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3202                 $GoToMsg     = "first character before equals differ";
3203                 $return_code = 1;
3204             }
3205
3206             # The introduction of sub 'prune_alignment_tree'
3207             # enabled alignment of lists left of the equals with
3208             # other scalar variables. For example:
3209             # my ( $D, $s, $e ) = @_;
3210             # my $d             = length $D;
3211             # my $c             = $e - $s - $d;
3212
3213             # But this would change formatting of a lot of scripts,
3214             # so for now we prevent alignment of comma lists on the
3215             # left with scalars on the left.  We will also prevent
3216             # any partial alignments.
3217
3218             # set return code 2 if the = is at line level, but
3219             # set return code 1 if the = is below line level, i.e.
3220             #  sub new { my ( $p, $v ) = @_; bless \$v, $p }
3221             #  sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3222
3223             elsif (
3224                 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
3225             {
3226                 $GoToMsg     = "mixed commas/no-commas before equals";
3227                 $return_code = 1;
3228                 if ( $lev eq $group_level ) {
3229                     $return_code = 2;
3230                 }
3231             }
3232             else {
3233                 $return_code = 0;
3234             }
3235         }
3236         else {
3237             $return_code = 0;
3238         }
3239
3240         EXPLAIN_COMPARE_PATTERNS
3241           && $return_code
3242           && print STDERR "no match because $GoToMsg\n";
3243
3244         return ( $return_code, \$GoToMsg );
3245
3246     };    ## end of $compare_patterns->()
3247
3248     # loop over subgroups
3249     foreach my $item ( @{$rsubgroups} ) {
3250         my ( $jbeg, $jend ) = @{$item};
3251         my $nlines = $jend - $jbeg + 1;
3252         next unless ( $nlines > 1 );
3253
3254         # loop over lines in a subgroup
3255         foreach my $jj ( $jbeg .. $jend ) {
3256
3257             $line_m           = $line;
3258             $rtokens_m        = $rtokens;
3259             $rpatterns_m      = $rpatterns;
3260             $rfield_lengths_m = $rfield_lengths;
3261             $imax_m           = $imax;
3262             $list_type_m      = $list_type;
3263             $ci_level_m       = $ci_level;
3264
3265             $line           = $rnew_lines->[$jj];
3266             $rtokens        = $line->{'rtokens'};
3267             $rpatterns      = $line->{'rpatterns'};
3268             $rfield_lengths = $line->{'rfield_lengths'};
3269             $imax           = @{$rtokens} - 2;
3270             $list_type      = $line->{'list_type'};
3271             $ci_level       = $line->{'ci_level'};
3272
3273             # nothing to do for first line
3274             next if ( $jj == $jbeg );
3275
3276             my $ci_jump = $ci_level - $ci_level_m;
3277
3278             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3279
3280             my $imax_align = -1;
3281
3282             # find number of leading common tokens
3283
3284             #---------------------------------
3285             # No match to hanging side comment
3286             #---------------------------------
3287             if ( $line->{'is_hanging_side_comment'} ) {
3288
3289                 # Should not get here; HSC's have been filtered out
3290                 $imax_align = -1;
3291             }
3292
3293             #-----------------------------
3294             # Handle comma-separated lists
3295             #-----------------------------
3296             elsif ( $list_type && $list_type eq $list_type_m ) {
3297
3298                 # do not align lists across a ci jump with new list method
3299                 if ($ci_jump) { $imax_min = -1 }
3300
3301                 my $i_nomatch = $imax_min + 1;
3302                 foreach my $i ( 0 .. $imax_min ) {
3303                     my $tok   = $rtokens->[$i];
3304                     my $tok_m = $rtokens_m->[$i];
3305                     if ( $tok ne $tok_m ) {
3306                         $i_nomatch = $i;
3307                         last;
3308                     }
3309                 }
3310
3311                 $imax_align = $i_nomatch - 1;
3312             }
3313
3314             #-----------------
3315             # Handle non-lists
3316             #-----------------
3317             else {
3318                 my $i_nomatch = $imax_min + 1;
3319                 foreach my $i ( 0 .. $imax_min ) {
3320                     my $tok   = $rtokens->[$i];
3321                     my $tok_m = $rtokens_m->[$i];
3322                     if ( $tok ne $tok_m ) {
3323                         $i_nomatch = $i;
3324                         last;
3325                     }
3326
3327                     my $pat   = $rpatterns->[$i];
3328                     my $pat_m = $rpatterns_m->[$i];
3329
3330                     # If patterns don't match, we have to be careful...
3331                     if ( $pat_m ne $pat ) {
3332                         my $pad =
3333                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3334                         my ( $match_code, $rmsg ) = $compare_patterns->(
3335                             $tok, $tok_m, $pat, $pat_m, $pad
3336                         );
3337                         if ($match_code) {
3338                             if    ( $match_code == 1 ) { $i_nomatch = $i }
3339                             elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3340                             last;
3341                         }
3342                     }
3343                 }
3344                 $imax_align = $i_nomatch - 1;
3345             }
3346
3347             $line_m->{'imax_pair'} = $imax_align;
3348
3349         } ## end loop over lines
3350
3351         # Put fence at end of subgroup
3352         $line->{'imax_pair'} = -1;
3353
3354     } ## end loop over subgroups
3355
3356     # if there are hanging side comments, propagate the pair info down to them
3357     # so that lines can just look back one line for their pair info.
3358     if ( @{$rlines} > @{$rnew_lines} ) {
3359         my $last_pair_info = -1;
3360         foreach my $line ( @{$rlines} ) {
3361             if ( $line->{'is_hanging_side_comment'} ) {
3362                 $line->{'imax_pair'} = $last_pair_info;
3363             }
3364             else {
3365                 $last_pair_info = $line->{'imax_pair'};
3366             }
3367         }
3368     }
3369     return;
3370 }
3371
3372 sub fat_comma_to_comma {
3373     my ($str) = @_;
3374
3375     # We are changing '=>' to ',' and removing any trailing decimal count
3376     # because currently fat commas have a count and commas do not.
3377     # For example, we will change '=>2+{-3.2' into ',2+{-3'
3378     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3379     return $str;
3380 }
3381
3382 sub get_line_token_info {
3383
3384     # scan lines of tokens and return summary information about the range of
3385     # levels and patterns.
3386     my ($rlines) = @_;
3387
3388     # First scan to check monotonicity. Here is an example of several
3389     # lines which are monotonic. The = is the lowest level, and
3390     # the commas are all one level deeper. So this is not nonmonotonic.
3391     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
3392     #  $$d{"days"}    = [ "d",  "day", "days" ];
3393     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
3394     my @all_token_info;
3395     my $all_monotonic = 1;
3396     foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3397         my ($line) = $rlines->[$jj];
3398         my $rtokens = $line->{'rtokens'};
3399         my $last_lev;
3400         my $is_monotonic = 1;
3401         my $i            = -1;
3402         foreach my $tok ( @{$rtokens} ) {
3403             $i++;
3404             my ( $raw_tok, $lev, $tag, $tok_count ) =
3405               decode_alignment_token($tok);
3406             push @{ $all_token_info[$jj] },
3407               [ $raw_tok, $lev, $tag, $tok_count ];
3408             last if ( $tok eq '#' );
3409             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3410             $last_lev = $lev;
3411         }
3412         if ( !$is_monotonic ) { $all_monotonic = 0 }
3413     }
3414
3415     my $rline_values = [];
3416     foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3417         my ($line) = $rlines->[$jj];
3418
3419         my $rtokens = $line->{'rtokens'};
3420         my $i       = -1;
3421         my ( $lev_min, $lev_max );
3422         my $token_pattern_max = EMPTY_STRING;
3423         my %saw_level;
3424         my $is_monotonic = 1;
3425
3426         # find the index of the last token before the side comment
3427         my $imax      = @{$rtokens} - 2;
3428         my $imax_true = $imax;
3429
3430         # If the entire group is monotonic, and the line ends in a comma list,
3431         # walk it back to the first such comma. this will have the effect of
3432         # making all trailing ragged comma lists match in the prune tree
3433         # routine.  these trailing comma lists can better be handled by later
3434         # alignment rules.
3435
3436         # Treat fat commas the same as commas here by converting them to
3437         # commas.  This will improve the chance of aligning the leading parts
3438         # of ragged lists.
3439
3440         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3441         if ( $all_monotonic && $tok_end =~ /^,/ ) {
3442             my $ii = $imax - 1;
3443             while ( $ii >= 0
3444                 && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3445             {
3446                 $imax = $ii;
3447                 $ii--;
3448             }
3449         }
3450
3451         # make a first pass to find level range
3452         my $last_lev;
3453         foreach my $tok ( @{$rtokens} ) {
3454             $i++;
3455             last if ( $i > $imax );
3456             last if ( $tok eq '#' );
3457             my ( $raw_tok, $lev, $tag, $tok_count ) =
3458               @{ $all_token_info[$jj]->[$i] };
3459
3460             last if ( $tok eq '#' );
3461             $token_pattern_max .= $tok;
3462             $saw_level{$lev}++;
3463             if ( !defined($lev_min) ) {
3464                 $lev_min = $lev;
3465                 $lev_max = $lev;
3466             }
3467             else {
3468                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
3469                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
3470                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3471             }
3472             $last_lev = $lev;
3473         }
3474
3475         # handle no levels
3476         my $rtoken_patterns = {};
3477         my $rtoken_indexes  = {};
3478         my @levs            = sort keys %saw_level;
3479         if ( !defined($lev_min) ) {
3480             $lev_min                     = -1;
3481             $lev_max                     = -1;
3482             $levs[0]                     = -1;
3483             $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3484             $rtoken_indexes->{$lev_min}  = [];
3485         }
3486
3487         # handle one level
3488         elsif ( $lev_max == $lev_min ) {
3489             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3490             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3491         }
3492
3493         # handle multiple levels
3494         else {
3495             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3496             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3497
3498             my $debug   = 0;
3499             my $lev_top = pop @levs;    # alread did max level
3500             my $itok    = -1;
3501             foreach my $tok ( @{$rtokens} ) {
3502                 $itok++;
3503                 last if ( $itok > $imax );
3504                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3505                   @{ $all_token_info[$jj]->[$itok] };
3506                 last if ( $raw_tok eq '#' );
3507                 foreach my $lev_test (@levs) {
3508                     next if ( $lev > $lev_test );
3509                     $rtoken_patterns->{$lev_test} .= $tok;
3510                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
3511                 }
3512             }
3513             push @levs, $lev_top;
3514         }
3515
3516         push @{$rline_values},
3517           [
3518             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3519             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3520           ];
3521
3522         # debug
3523         0 && do {
3524             local $LIST_SEPARATOR = ')(';
3525             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3526             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3527                 print "$key => $rtoken_patterns->{$key}\n";
3528                 print "$key => @{$rtoken_indexes->{$key}}\n";
3529             }
3530         };
3531     } ## end loop over lines
3532     return ( $rline_values, $all_monotonic );
3533 }
3534
3535 sub prune_alignment_tree {
3536     my ($rlines) = @_;
3537     my $jmax = @{$rlines} - 1;
3538     return unless $jmax > 0;
3539
3540     # Vertical alignment in perltidy is done as an iterative process.  The
3541     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3542     # etc) for vertical alignment.  Then we have to delete all alignments
3543     # which, if actually made, would detract from overall alignment.  This
3544     # is done in several phases of which this is one.
3545
3546     # In this routine we look at the alignments of a group of lines as a
3547     # hierarchical tree.  We will 'prune' the tree to limited depths if that
3548     # will improve overall alignment at the lower depths.
3549     # For each line we will be looking at its alignment patterns down to
3550     # different fixed depths. For each depth, we include all lower depths and
3551     # ignore all higher depths.  We want to see if we can get alignment of a
3552     # larger group of lines if we ignore alignments at some lower depth.
3553     # Here is an # example:
3554
3555     # for (
3556     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
3557     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
3558     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
3559     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
3560     # );
3561
3562     # In the above example, all lines have three commas at the lowest depth
3563     # (zero), so if there were no other alignments, these lines would all
3564     # align considering only the zero depth alignment token.  But some lines
3565     # have additional comma alignments at the next depth, so we need to decide
3566     # if we should drop those to keep the top level alignments, or keep those
3567     # for some additional low level alignments at the expense losing some top
3568     # level alignments.  In this case we will drop the deeper level commas to
3569     # keep the entire collection aligned.  But in some cases the decision could
3570     # go the other way.
3571
3572     # The tree for this example at the zero depth has one node containing
3573     # all four lines, since they are identical at zero level (three commas).
3574     # At depth one, there are three 'children' nodes, namely:
3575     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3576     # - line 3, which has 2 commas at depth 1
3577     # - line4, which has a ';' and a ',' at depth 1
3578     # There are no deeper alignments in this example.
3579     # so the tree structure for this example is:
3580     #
3581     #    depth 0         depth 1      depth 2
3582     #    [lines 1-4] --  [line 1-2] -  (empty)
3583     #                 |  [line 3]   -  (empty)
3584     #                 |  [line 4]   -  (empty)
3585
3586     # We can carry this to any depth, but it is not really useful to go below
3587     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3588     # alignments at depth >=2.
3589
3590     use constant EXPLAIN_PRUNE => 0;
3591
3592     #-------------------------------------------------------------------
3593     # Prune Tree Step 1. Start by scanning the lines and collecting info
3594     #-------------------------------------------------------------------
3595
3596     # Note that the caller had this info but we have to redo this now because
3597     # alignment tokens may have been deleted.
3598     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3599
3600     # If all the lines have levels which increase monotonically from left to
3601     # right, then the sweep-left-to-right pass can do a better job of alignment
3602     # than pruning, and without deleting alignments.
3603     return if ($all_monotonic);
3604
3605     # Contents of $rline_values
3606     #   [
3607     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3608     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3609     #   ];
3610
3611     # We can work to any depth, but there is little advantage to working
3612     # to a a depth greater than 2
3613     my $MAX_DEPTH = 2;
3614
3615     # This arrays will hold the tree of alignment tokens at different depths
3616     # for these lines.
3617     my @match_tree;
3618
3619     # Tree nodes contain these values:
3620     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3621     #                        $nc_beg_p, $nc_end_p, $rindexes];
3622     # where
3623     #      $depth = 0,1,2 = index of depth of the match
3624
3625     #  $jbeg beginning index j of the range of lines in this match
3626     #  $jend ending index j of the range of lines in this match
3627     #  $n_parent = index of the containing group at $depth-1, if it exists
3628     #  $level = actual level of code being matched in this group
3629     #  $pattern = alignment pattern being matched
3630     #  $nc_beg_p = first child
3631     #  $nc_end_p = last child
3632     #  $rindexes = ref to token indexes
3633
3634     # the patterns and levels of the current group being formed at each depth
3635     my ( @token_patterns_current, @levels_current, @token_indexes_current );
3636
3637     # the patterns and levels of the next line being tested at each depth
3638     my ( @token_patterns_next, @levels_next, @token_indexes_next );
3639
3640     #-----------------------------------------------------------
3641     # define a recursive worker subroutine for tree construction
3642     #-----------------------------------------------------------
3643
3644     # This is a recursive routine which is called if a match condition changes
3645     # at any depth when a new line is encountered.  It ends the match node
3646     # which changed plus all deeper nodes attached to it.
3647     my $end_node;
3648     $end_node = sub {
3649         my ( $depth, $jl, $n_parent ) = @_;
3650
3651         # $depth is the tree depth
3652         # $jl is the  index of the line
3653         # $n_parent is index of the parent node of this node
3654
3655         return if ( $depth > $MAX_DEPTH );
3656
3657         # end any current group at this depth
3658         if (   $jl >= 0
3659             && defined( $match_tree[$depth] )
3660             && @{ $match_tree[$depth] }
3661             && defined( $levels_current[$depth] ) )
3662         {
3663             $match_tree[$depth]->[-1]->[1] = $jl;
3664         }
3665
3666         # Define the index of the node we will create below
3667         my $ng_self = 0;
3668         if ( defined( $match_tree[$depth] ) ) {
3669             $ng_self = @{ $match_tree[$depth] };
3670         }
3671
3672         # end any next deeper child node(s)
3673         $end_node->( $depth + 1, $jl, $ng_self );
3674
3675         # update the levels being matched
3676         $token_patterns_current[$depth] = $token_patterns_next[$depth];
3677         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
3678         $levels_current[$depth]         = $levels_next[$depth];
3679
3680         # Do not start a new group at this level if it is not being used
3681         if ( !defined( $levels_next[$depth] )
3682             || $depth > 0
3683             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3684         {
3685             return;
3686         }
3687
3688         # Create a node for the next group at this depth. We initially assume
3689         # that it will continue to $jmax, and correct that later if the node
3690         # ends earlier.
3691         push @{ $match_tree[$depth] },
3692           [
3693             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3694             $token_patterns_current[$depth],
3695             undef, undef, $token_indexes_current[$depth],
3696           ];
3697
3698         return;
3699     };    ## end sub end_node
3700
3701     #-----------------------------------------------------
3702     # Prune Tree Step 2. Loop to form the tree of matches.
3703     #-----------------------------------------------------
3704     foreach my $jp ( 0 .. $jmax ) {
3705
3706         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3707         my $jm = $jp - 1;
3708
3709         # Pull out needed values for the next line
3710         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3711             $is_monotonic, $imax_true, $imax )
3712           = @{ $rline_values->[$jp] };
3713
3714         # Transfer levels and patterns for this line to the working arrays.
3715         # If the number of levels differs from our chosen MAX_DEPTH ...
3716         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3717         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3718         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3719         if ( @{$rlevs} > $MAX_DEPTH ) {
3720             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3721         }
3722         my $depth = 0;
3723         foreach my $item (@levels_next) {
3724             $token_patterns_next[$depth] =
3725               defined($item) ? $rtoken_patterns->{$item} : undef;
3726             $token_indexes_next[$depth] =
3727               defined($item) ? $rtoken_indexes->{$item} : undef;
3728             $depth++;
3729         }
3730
3731         # Look for a change in match groups...
3732
3733         # Initialize on the first line
3734         if ( $jp == 0 ) {
3735             my $n_parent;
3736             $end_node->( 0, $jm, $n_parent );
3737         }
3738
3739         # End groups if a hard flag has been set
3740         elsif ( $rlines->[$jm]->{'end_group'} ) {
3741             my $n_parent;
3742             $end_node->( 0, $jm, $n_parent );
3743         }
3744
3745         # Continue at hanging side comment
3746         elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
3747             next;
3748         }
3749
3750         # Otherwise see if anything changed and update the tree if so
3751         else {
3752             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3753
3754                 my $def_current = defined( $token_patterns_current[$depth] );
3755                 my $def_next    = defined( $token_patterns_next[$depth] );
3756                 last unless ( $def_current || $def_next );
3757                 if (   !$def_current
3758                     || !$def_next
3759                     || $token_patterns_current[$depth] ne
3760                     $token_patterns_next[$depth] )
3761                 {
3762                     my $n_parent;
3763                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3764                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3765                     }
3766                     $end_node->( $depth, $jm, $n_parent );
3767                     last;
3768                 }
3769             }
3770         }
3771     } ## end loop to form tree of matches
3772
3773     #---------------------------------------------------------
3774     # Prune Tree Step 3. Make links from parent to child nodes
3775     #---------------------------------------------------------
3776
3777     # It seemed cleaner to do this as a separate step rather than during tree
3778     # construction.  The children nodes have links up to the parent node which
3779     # created them.  Now make links in the opposite direction, so the parents
3780     # can find the children.  We store the range of children nodes ($nc_beg,
3781     # $nc_end) of each parent with two additional indexes in the original array.
3782     # These will be undef if no children.
3783     foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3784         next unless defined( $match_tree[$depth] );
3785         my $nc_max = @{ $match_tree[$depth] } - 1;
3786         my $np_now;
3787         foreach my $nc ( 0 .. $nc_max ) {
3788             my $np = $match_tree[$depth]->[$nc]->[2];
3789             if ( !defined($np) ) {
3790
3791                 # shouldn't happen
3792                 #print STDERR "lost child $np at depth $depth\n";
3793                 next;
3794             }
3795             if ( !defined($np_now) || $np != $np_now ) {
3796                 $np_now = $np;
3797                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3798             }
3799             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3800         }
3801     } ## end loop to make links down to the child nodes
3802
3803     EXPLAIN_PRUNE > 0 && do {
3804         print "Tree complete. Found these groups:\n";
3805         foreach my $depth ( 0 .. $MAX_DEPTH ) {
3806             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3807         }
3808     };
3809
3810     #------------------------------------------------------
3811     # Prune Tree Step 4. Make a list of nodes to be deleted
3812     #------------------------------------------------------
3813
3814     #  list of lines with tokens to be deleted:
3815     #  [$jbeg, $jend, $level_keep]
3816     #  $jbeg..$jend is the range of line indexes,
3817     #  $level_keep is the minimum level to keep
3818     my @delete_list;
3819
3820     # Not currently used:
3821     #  Groups with ending comma lists and their range of sizes:
3822     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3823     ## my %ragged_comma_group;
3824
3825     # Define a threshold line count for forcing a break
3826     my $nlines_break = 3;
3827
3828     # We work with a list of nodes to visit at the next deeper depth.
3829     my @todo_list;
3830     if ( defined( $match_tree[0] ) ) {
3831         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3832     }
3833
3834     foreach my $depth ( 0 .. $MAX_DEPTH ) {
3835         last unless (@todo_list);
3836         my @todo_next;
3837         foreach my $np (@todo_list) {
3838             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3839                 $rindexes_p )
3840               = @{ $match_tree[$depth]->[$np] };
3841             my $nlines_p = $jend_p - $jbeg_p + 1;
3842
3843             # nothing to do if no children
3844             next unless defined($nc_beg_p);
3845
3846             # Define the number of lines to either keep or delete a child node.
3847             # This is the key decision we have to make.  We want to delete
3848             # short runs of matched lines, and keep long runs.  It seems easier
3849             # for the eye to follow breaks in monotonic level changes than
3850             # non-monotonic level changes.  For example, the following looks
3851             # best if we delete the lower level alignments:
3852
3853             #  [1]                  ~~ [];
3854             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3855             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
3856             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
3857             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
3858             #  $deep1               ~~ $deep1;
3859
3860             # So we will use two thresholds.
3861             my $nmin_mono     = $depth + 2;
3862             my $nmin_non_mono = $depth + 6;
3863             if ( $nmin_mono > $nlines_p - 1 ) {
3864                 $nmin_mono = $nlines_p - 1;
3865             }
3866             if ( $nmin_non_mono > $nlines_p - 1 ) {
3867                 $nmin_non_mono = $nlines_p - 1;
3868             }
3869
3870             # loop to keep or delete each child node
3871             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3872                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3873                     $nc_end_c )
3874                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
3875                 my $nlines_c     = $jend_c - $jbeg_c + 1;
3876                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3877                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3878                 if ( $nlines_c < $nmin ) {
3879 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3880                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3881                 }
3882                 else {
3883 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3884                     push @todo_next, $nc;
3885                 }
3886             }
3887         }
3888         @todo_list = @todo_next;
3889     } ## end loop to mark nodes to delete
3890
3891     #------------------------------------------------------------
3892     # Prune Tree Step 5. Loop to delete selected alignment tokens
3893     #------------------------------------------------------------
3894     foreach my $item (@delete_list) {
3895         my ( $jbeg, $jend, $level_keep ) = @{$item};
3896         foreach my $jj ( $jbeg .. $jend ) {
3897             my $line = $rlines->[$jj];
3898             my @idel;
3899             my $rtokens = $line->{'rtokens'};
3900             my $imax    = @{$rtokens} - 2;
3901             foreach my $i ( 0 .. $imax ) {
3902                 my $tok = $rtokens->[$i];
3903                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3904                   decode_alignment_token($tok);
3905                 if ( $lev > $level_keep ) {
3906                     push @idel, $i;
3907                 }
3908             }
3909             if (@idel) {
3910                 delete_selected_tokens( $line, \@idel );
3911             }
3912         }
3913     } ## end loop to delete selected alignment tokens
3914
3915     return;
3916 } ## end sub prune_alignment_tree
3917
3918 sub Dump_tree_groups {
3919     my ( $rgroup, $msg ) = @_;
3920     print "$msg\n";
3921     local $LIST_SEPARATOR = ')(';
3922     foreach my $item ( @{$rgroup} ) {
3923         my @fix = @{$item};
3924         foreach my $val (@fix) { $val = "undef" unless defined $val; }
3925         $fix[4] = "...";
3926         print "(@fix)\n";
3927     }
3928     return;
3929 }
3930
3931 {    ## closure for sub is_marginal_match
3932
3933     my %is_if_or;
3934     my %is_assignment;
3935     my %is_good_alignment;
3936
3937     # This test did not give sufficiently better results to use as an update,
3938     # but the flag is worth keeping as a starting point for future testing.
3939     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
3940
3941     BEGIN {
3942
3943         my @q = qw(
3944           if unless or ||
3945         );
3946         @is_if_or{@q} = (1) x scalar(@q);
3947
3948         @q = qw(
3949           = **= += *= &= <<= &&=
3950           -= /= |= >>= ||= //=
3951           .= %= ^=
3952           x=
3953         );
3954         @is_assignment{@q} = (1) x scalar(@q);
3955
3956         # Vertically aligning on certain "good" tokens is usually okay
3957         # so we can be less restrictive in marginal cases.
3958         @q = qw( { ? => = );
3959         push @q, (',');
3960         @is_good_alignment{@q} = (1) x scalar(@q);
3961     }
3962
3963     sub is_marginal_match {
3964
3965         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
3966
3967         # Decide if we should undo some or all of the common alignments of a
3968         # group of just two lines.
3969
3970         # Given:
3971         #   $line_0 and $line_1 - the two lines
3972         #   $group_level = the indentation level of the group being processed
3973         #   $imax_align = the maximum index of the common alignment tokens
3974         #                 of the two lines
3975         #   $imax_prev  = the maximum index of the common alignment tokens
3976         #                 with the line before $line_0 (=-1 of does not exist)
3977
3978         # Return:
3979         #   $is_marginal = true if the two lines should NOT be fully aligned
3980         #                = false if the two lines can remain fully aligned
3981         #   $imax_align  = the index of the highest alignment token shared by
3982         #                  these two lines to keep if the match is marginal.
3983
3984         # When we have an alignment group of just two lines like this, we are
3985         # working in the twilight zone of what looks good and what looks bad.
3986         # This routine is a collection of rules which work have been found to
3987         # work fairly well, but it will need to be updated from time to time.
3988
3989         my $is_marginal = 0;
3990
3991         #---------------------------------------
3992         # Always align certain special cases ...
3993         #---------------------------------------
3994         if (
3995
3996             # always keep alignments of a terminal else or ternary
3997             defined( $line_1->{'j_terminal_match'} )
3998
3999             # always align lists
4000             || $line_0->{'list_type'}
4001
4002             # always align hanging side comments
4003             || $line_1->{'is_hanging_side_comment'}
4004
4005           )
4006         {
4007             return ( $is_marginal, $imax_align );
4008         }
4009
4010         my $jmax_0           = $line_0->{'jmax'};
4011         my $jmax_1           = $line_1->{'jmax'};
4012         my $rtokens_1        = $line_1->{'rtokens'};
4013         my $rtokens_0        = $line_0->{'rtokens'};
4014         my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4015         my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4016         my $rpatterns_0      = $line_0->{'rpatterns'};
4017         my $rpatterns_1      = $line_1->{'rpatterns'};
4018         my $imax_next        = $line_1->{'imax_pair'};
4019
4020         # We will scan the alignment tokens and set a flag '$is_marginal' if
4021         # it seems that the an alignment would look bad.
4022         my $max_pad            = 0;
4023         my $saw_good_alignment = 0;
4024         my $saw_if_or;                # if we saw an 'if' or 'or' at group level
4025         my $raw_tokb = EMPTY_STRING;  # first token seen at group level
4026         my $jfirst_bad;
4027         my $line_ending_fat_comma;    # is last token just a '=>' ?
4028         my $j0_eq_pad;
4029         my $j0_max_pad = 0;
4030
4031         foreach my $j ( 0 .. $jmax_1 - 2 ) {
4032             my ( $raw_tok, $lev, $tag, $tok_count ) =
4033               decode_alignment_token( $rtokens_1->[$j] );
4034             if ( $raw_tok && $lev == $group_level ) {
4035                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
4036                 $saw_if_or ||= $is_if_or{$raw_tok};
4037             }
4038
4039             # When the first of the two lines ends in a bare '=>' this will
4040             # probably be marginal match.  (For a bare =>, the next field length
4041             # will be 2 or 3, depending on side comment)
4042             $line_ending_fat_comma =
4043                  $j == $jmax_1 - 2
4044               && $raw_tok eq '=>'
4045               && $rfield_lengths_0->[ $j + 1 ] <= 3;
4046
4047             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4048             if ( $j == 0 ) {
4049                 $pad += $line_1->{'leading_space_count'} -
4050                   $line_0->{'leading_space_count'};
4051
4052                 # Remember the pad at a leading equals
4053                 if ( $raw_tok eq '=' && $lev == $group_level ) {
4054                     $j0_eq_pad = $pad;
4055                     $j0_max_pad =
4056                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4057                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4058                 }
4059             }
4060
4061             if ( $pad < 0 )        { $pad     = -$pad }
4062             if ( $pad > $max_pad ) { $max_pad = $pad }
4063             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4064                 $saw_good_alignment = 1;
4065             }
4066             else {
4067                 $jfirst_bad = $j unless defined($jfirst_bad);
4068             }
4069             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4070
4071                 # Flag this as a marginal match since patterns differ.
4072                 # Normally, we will not allow just two lines to match if
4073                 # marginal. But we can allow matching in some specific cases.
4074
4075                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
4076                 $is_marginal = 1  if ( $is_marginal == 0 );
4077                 if ( $raw_tok eq '=' ) {
4078
4079                     # Here is an example of a marginal match:
4080                     #       $done{$$op} = 1;
4081                     #       $op         = compile_bblock($op);
4082                     # The left tokens are both identifiers, but
4083                     # one accesses a hash and the other doesn't.
4084                     # We'll let this be a tentative match and undo
4085                     # it later if we don't find more than 2 lines
4086                     # in the group.
4087                     $is_marginal = 2;
4088                 }
4089             }
4090         }
4091
4092         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4093
4094         # Turn off the "marginal match" flag in some cases...
4095         # A "marginal match" occurs when the alignment tokens agree
4096         # but there are differences in the other tokens (patterns).
4097         # If we leave the marginal match flag set, then the rule is that we
4098         # will align only if there are more than two lines in the group.
4099         # We will turn of the flag if we almost have a match
4100         # and either we have seen a good alignment token or we
4101         # just need a small pad (2 spaces) to fit.  These rules are
4102         # the result of experimentation.  Tokens which misaligned by just
4103         # one or two characters are annoying.  On the other hand,
4104         # large gaps to less important alignment tokens are also annoying.
4105         if ( $is_marginal == 1
4106             && ( $saw_good_alignment || $max_pad < 3 ) )
4107         {
4108             $is_marginal = 0;
4109         }
4110
4111         # We will use the line endings to help decide on alignments...
4112         # See if the lines end with semicolons...
4113         my $sc_term0;
4114         my $sc_term1;
4115         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4116
4117             # shouldn't happen
4118         }
4119         else {
4120             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4121             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4122             $sc_term0 = $pat0 =~ /;b?$/;
4123             $sc_term1 = $pat1 =~ /;b?$/;
4124         }
4125
4126         if ( !$is_marginal && !$sc_term0 ) {
4127
4128             # First line of assignment should be semicolon terminated.
4129             # For example, do not align here:
4130             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4131             #    $$href{-NUM_DIRS} = 0;
4132             if ( $is_assignment{$raw_tokb} ) {
4133                 $is_marginal = 1;
4134             }
4135         }
4136
4137         # Try to avoid some undesirable alignments of opening tokens
4138         # for example, the space between grep and { here:
4139         #  return map { ( $_ => $_ ) }
4140         #    grep     { /$handles/ } $self->_get_delegate_method_list;
4141         $is_marginal ||=
4142              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4143           && $jmax_1 == 2
4144           && $sc_term0 ne $sc_term1;
4145
4146         #---------------------------------------
4147         # return if this is not a marginal match
4148         #---------------------------------------
4149         if ( !$is_marginal ) {
4150             return ( $is_marginal, $imax_align );
4151         }
4152
4153         # Undo the marginal match flag in certain cases,
4154
4155         # Two lines with a leading equals-like operator are allowed to
4156         # align if the patterns to the left of the equals are the same.
4157         # For example the following two lines are a marginal match but have
4158         # the same left side patterns, so we will align the equals.
4159         #     my $orig = my $format = "^<<<<< ~~\n";
4160         #     my $abc  = "abc";
4161         # But these have a different left pattern so they will not be
4162         # aligned
4163         #     $xmldoc .= $`;
4164         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
4165
4166         # First line semicolon terminated but second not, usually ok:
4167         #               my $want = "'ab', 'a', 'b'";
4168         #               my $got  = join( ", ",
4169         #                    map { defined($_) ? "'$_'" : "undef" }
4170         #                          @got );
4171         #  First line not semicolon terminated, Not OK to match:
4172         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4173         #      $$href{-NUM_DIRS} = 0;
4174         my $pat0 = $rpatterns_0->[0];
4175         my $pat1 = $rpatterns_1->[0];
4176
4177         #---------------------------------------------------------
4178         # Turn off the marginal flag for some types of assignments
4179         #---------------------------------------------------------
4180         if ( $is_assignment{$raw_tokb} ) {
4181
4182             # undo marginal flag if first line is semicolon terminated
4183             # and leading patters match
4184             if ($sc_term0) {    # && $sc_term1) {
4185                 $is_marginal = $pat0 ne $pat1;
4186             }
4187         }
4188         elsif ( $raw_tokb eq '=>' ) {
4189
4190             # undo marginal flag if patterns match
4191             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4192         }
4193         elsif ( $raw_tokb eq '=~' ) {
4194
4195             # undo marginal flag if both lines are semicolon terminated
4196             # and leading patters match
4197             if ( $sc_term1 && $sc_term0 ) {
4198                 $is_marginal = $pat0 ne $pat1;
4199             }
4200         }
4201
4202         #-----------------------------------------------------
4203         # Turn off the marginal flag if we saw an 'if' or 'or'
4204         #-----------------------------------------------------
4205
4206         # A trailing 'if' and 'or' often gives a good alignment
4207         # For example, we can align these:
4208         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
4209         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4210
4211         # or
4212         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
4213         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4214
4215         if ($saw_if_or) {
4216
4217             # undo marginal flag if both lines are semicolon terminated
4218             if ( $sc_term0 && $sc_term1 ) {
4219                 $is_marginal = 0;
4220             }
4221         }
4222
4223         # For a marginal match, only keep matches before the first 'bad' match
4224         if (   $is_marginal
4225             && defined($jfirst_bad)
4226             && $imax_align > $jfirst_bad - 1 )
4227         {
4228             $imax_align = $jfirst_bad - 1;
4229         }
4230
4231         #----------------------------------------------------------
4232         # Allow sweep to match lines with leading '=' in some cases
4233         #----------------------------------------------------------
4234         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4235
4236             if (
4237
4238                 # If there is a following line with leading equals, or
4239                 # preceding line with leading equals, then let the sweep align
4240                 # them without restriction.  For example, the first two lines
4241                 # here are a marginal match, but they are followed by a line
4242                 # with leading equals, so the sweep-lr logic can align all of
4243                 # the lines:
4244
4245                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4246                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4247                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4248                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4249
4250                 # Likewise, if we reverse the two pairs we want the same result
4251
4252                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4253                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4254                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4255                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4256
4257                 (
4258                        $imax_next >= 0
4259                     || $imax_prev >= 0
4260                     || TEST_MARGINAL_EQ_ALIGNMENT
4261                 )
4262                 && $j0_eq_pad >= -$j0_max_pad
4263                 && $j0_eq_pad <= $j0_max_pad
4264               )
4265             {
4266
4267                 # But do not do this if there is a comma before the '='.
4268                 # For example, the first two lines below have commas and
4269                 # therefore are not allowed to align with lines 3 & 4:
4270
4271                 # my ( $x, $y ) = $self->Size();                      #<--line_0
4272                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4273                 # my $vx = $right - $left;
4274                 # my $vy = $bottom - $top;
4275
4276                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4277                     $imax_align = 0;
4278                 }
4279             }
4280         }
4281
4282         return ( $is_marginal, $imax_align );
4283     }
4284 } ## end closure for sub is_marginal_match
4285
4286 sub get_extra_leading_spaces {
4287
4288     my ( $rlines, $rgroups ) = @_;
4289
4290     #----------------------------------------------------------
4291     # Define any extra indentation space (for the -lp option).
4292     # Here is why:
4293     # If a list has side comments, sub scan_list must dump the
4294     # list before it sees everything.  When this happens, it sets
4295     # the indentation to the standard scheme, but notes how
4296     # many spaces it would have liked to use.  We may be able
4297     # to recover that space here in the event that all of the
4298     # lines of a list are back together again.
4299     #----------------------------------------------------------
4300
4301     return 0 unless ( @{$rlines} && @{$rgroups} );
4302
4303     my $object = $rlines->[0]->{'indentation'};
4304     return 0 unless ( ref($object) );
4305     my $extra_leading_spaces            = 0;
4306     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4307     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4308
4309     my $min_spaces = $extra_indentation_spaces_wanted;
4310     if ( $min_spaces > 0 ) { $min_spaces = 0 }
4311
4312     # loop over all groups
4313     my $ng      = -1;
4314     my $ngroups = @{$rgroups};
4315     foreach my $item ( @{$rgroups} ) {
4316         $ng++;
4317         my ( $jbeg, $jend ) = @{$item};
4318         foreach my $j ( $jbeg .. $jend ) {
4319             next if ( $j == 0 );
4320
4321             # all indentation objects must be the same
4322             if ( $object != $rlines->[$j]->{'indentation'} ) {
4323                 return 0;
4324             }
4325         }
4326
4327        # find the maximum space without exceeding the line length for this group
4328         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4329         my $spaces =
4330           ( $avail > $extra_indentation_spaces_wanted )
4331           ? $extra_indentation_spaces_wanted
4332           : $avail;
4333
4334         #--------------------------------------------------------
4335         # Note: min spaces can be negative; for example with -gnu
4336         # f(
4337         #   do { 1; !!(my $x = bless []); }
4338         #  );
4339         #--------------------------------------------------------
4340         # The following rule is needed to match older formatting:
4341         # For multiple groups, we will keep spaces non-negative.
4342         # For a single group, we will allow a negative space.
4343         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4344
4345         # update the minimum spacing
4346         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4347             $extra_leading_spaces = $spaces;
4348         }
4349     }
4350
4351     # update the indentation object because with -icp the terminal
4352     # ');' will use the same adjustment.
4353     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4354     return $extra_leading_spaces;
4355 }
4356
4357 sub forget_side_comment {
4358     my ($self) = @_;
4359     $self->[_last_side_comment_column_] = 0;
4360     return;
4361 }
4362
4363 sub is_good_side_comment_column {
4364     my ( $self, $line, $line_number, $level, $num5 ) = @_;
4365
4366     # Upon encountering the first side comment of a group, decide if
4367     # a previous side comment should be forgotten.  This involves
4368     # checking several rules.
4369
4370     # Return true to KEEP old comment location
4371     # Return false to FORGET old comment location
4372     my $KEEP   = 1;
4373     my $FORGET = 0;
4374
4375     my $rfields                 = $line->{'rfields'};
4376     my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4377
4378     # RULE1: Never forget comment before a hanging side comment
4379     return $KEEP if ($is_hanging_side_comment);
4380
4381     # RULE2: Forget a side comment after a short line difference,
4382     # where 'short line difference' is computed from a formula.
4383     # Using a smooth formula helps minimize sudden large changes.
4384     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4385     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4386
4387     # '$num5' is the number of comments in the first 5 lines after the first
4388     # comment.  It is needed to keep a compact group of side comments from
4389     # being influenced by a more distant side comment.
4390     $num5 = 1 unless ($num5);
4391
4392     # Some values:
4393
4394     #        $adiff  $num5   $short_diff
4395     #        0       *       12
4396     #        1       1       6
4397     #        1       2       4
4398     #        1       3       3
4399     #        1       4       2
4400     #        2       1       4
4401     #        2       2       2
4402     #        2       3       1
4403     #        3       1       3
4404     #        3       2       1
4405
4406     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4407
4408     return $FORGET
4409       if ( $line_diff > $short_diff
4410         || !$self->[_rOpts_valign_side_comments_] );
4411
4412     # RULE3: Forget a side comment if this line is at lower level and
4413     # ends a block
4414     my $last_sc_level = $self->[_last_side_comment_level_];
4415     return $FORGET
4416       if ( $level < $last_sc_level
4417         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4418
4419     # RULE 4: Forget the last side comment if this comment might join a cached
4420     # line ...
4421     if ( my $cached_line_type = get_cached_line_type() ) {
4422
4423         # ... otherwise side comment alignment will get messed up.
4424         # For example, in the following test script
4425         # with using 'perltidy -sct -act=2', the last comment would try to
4426         # align with the previous and then be in the wrong column when
4427         # the lines are combined:
4428
4429         # foreach $line (
4430         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
4431         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
4432         #    [0, 4, 8], [2, 4, 6]
4433         #  )                                     # diagonals
4434         return $FORGET
4435           if ( $cached_line_type == 2 || $cached_line_type == 4 );
4436     }
4437
4438     # Otherwise, keep it alive
4439     return $KEEP;
4440 }
4441
4442 sub align_side_comments {
4443
4444     my ( $self, $rlines, $rgroups ) = @_;
4445
4446     # Align any side comments in this batch of lines
4447
4448     # Given:
4449     #  $rlines  - the lines
4450     #  $rgroups - the partition of the lines into groups
4451     #
4452     # We will be working group-by-group because all side comments
4453     # (real or fake) in each group are already aligned. So we just have
4454     # to make alignments between groups wherever possible.
4455
4456     # An unusual aspect is that within each group we have aligned both real
4457     # and fake side comments.  This has the consequence that the lengths of
4458     # long lines without real side comments can cause 'push' all side comments
4459     # to the right.  This seems unusual, but testing with and without this
4460     # feature shows that it is usually better this way.  Otherwise, side
4461     # comments can be hidden between long lines without side comments and
4462     # thus be harder to read.
4463
4464     my $group_level        = $self->[_group_level_];
4465     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4466       && $group_level == $self->[_last_level_written_];
4467
4468     # Find groups with side comments, and remember the first nonblank comment
4469     my $j_sc_beg;
4470     my @todo;
4471     my $ng = -1;
4472     foreach my $item ( @{$rgroups} ) {
4473         $ng++;
4474         my ( $jbeg, $jend ) = @{$item};
4475         foreach my $j ( $jbeg .. $jend ) {
4476             my $line = $rlines->[$j];
4477             my $jmax = $line->{'jmax'};
4478             if ( $line->{'rfield_lengths'}->[$jmax] ) {
4479
4480                 # this group has a line with a side comment
4481                 push @todo, $ng;
4482                 if ( !defined($j_sc_beg) ) {
4483                     $j_sc_beg = $j;
4484                 }
4485                 last;
4486             }
4487         }
4488     }
4489
4490     # done if no groups with side comments
4491     return unless @todo;
4492
4493     # Count $num5 = number of comments in the 5 lines after the first comment
4494     # This is an important factor in a decision formula
4495     my $num5 = 1;
4496     foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
4497         my $ldiff = $jj - $j_sc_beg;
4498         last if ( $ldiff > 5 );
4499         my $line   = $rlines->[$jj];
4500         my $jmax   = $line->{'jmax'};
4501         my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4502         next unless ($sc_len);
4503         $num5++;
4504     }
4505
4506     # Forget the old side comment location if necessary
4507     my $line_0 = $rlines->[$j_sc_beg];
4508     my $lnum =
4509       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4510     my $keep_it =
4511       $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4512     my $last_side_comment_column =
4513       $keep_it ? $self->[_last_side_comment_column_] : 0;
4514
4515     # If there are multiple groups we will do two passes
4516     # so that we can find a common alignment for all groups.
4517     my $MAX_PASS = @todo > 1 ? 2 : 1;
4518
4519     # Loop over passes
4520     my $max_comment_column = $last_side_comment_column;
4521     foreach my $PASS ( 1 .. $MAX_PASS ) {
4522
4523         # If there are two passes, then on the last pass make the old column
4524         # equal to the largest of the group.  This will result in the comments
4525         # being aligned if possible.
4526         if ( $PASS == $MAX_PASS ) {
4527             $last_side_comment_column = $max_comment_column;
4528         }
4529
4530         # Loop over the groups with side comments
4531         my $column_limit;
4532         foreach my $ng (@todo) {
4533             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4534
4535             # Note that since all lines in a group have common alignments, we
4536             # just have to work on one of the lines (the first line).
4537             my $line                    = $rlines->[$jbeg];
4538             my $jmax                    = $line->{'jmax'};
4539             my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4540             last
4541               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4542
4543             # the maximum space without exceeding the line length:
4544             my $avail = $line->get_available_space_on_right();
4545
4546             # try to use the previous comment column
4547             my $side_comment_column = $line->get_column( $jmax - 1 );
4548             my $move = $last_side_comment_column - $side_comment_column;
4549
4550             # Remember the maximum possible column of the first line with
4551             # side comment
4552             if ( !defined($column_limit) ) {
4553                 $column_limit = $side_comment_column + $avail;
4554             }
4555
4556             next if ( $jmax <= 0 );
4557
4558             # but if this doesn't work, give up and use the minimum space
4559             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4560             if ( $move > $avail ) {
4561                 $move = $min_move;
4562             }
4563
4564             # but we want some minimum space to the comment
4565             if (   $move >= 0
4566                 && $j_sc_beg == 0
4567                 && $continuing_sc_flow )
4568             {
4569                 $min_move = 0;
4570             }
4571
4572             # remove constraints on hanging side comments
4573             if ($is_hanging_side_comment) { $min_move = 0 }
4574
4575             if ( $move < $min_move ) {
4576                 $move = $min_move;
4577             }
4578
4579             # don't exceed the available space
4580             if ( $move > $avail ) { $move = $avail }
4581
4582             # We can only increase space, never decrease.
4583             if ( $move < 0 ) { $move = 0 }
4584
4585             # Discover the largest column on the preliminary  pass
4586             if ( $PASS < $MAX_PASS ) {
4587                 my $col = $line->get_column( $jmax - 1 ) + $move;
4588
4589                 # but ignore columns too large for the starting line
4590                 if ( $col > $max_comment_column && $col < $column_limit ) {
4591                     $max_comment_column = $col;
4592                 }
4593             }
4594
4595             # Make the changes on the final pass
4596             else {
4597                 $line->increase_field_width( $jmax - 1, $move );
4598
4599                 # remember this column for the next group
4600                 $last_side_comment_column = $line->get_column( $jmax - 1 );
4601             }
4602         } ## end loop over groups
4603     } ## end loop over passes
4604
4605     # Find the last side comment
4606     my $j_sc_last;
4607     my $ng_last = $todo[-1];
4608     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4609     foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4610         my $line = $rlines->[$jj];
4611         my $jmax = $line->{'jmax'};
4612         if ( $line->{'rfield_lengths'}->[$jmax] ) {
4613             $j_sc_last = $jj;
4614             last;
4615         }
4616     }
4617
4618     # Save final side comment info for possible use by the next batch
4619     if ( defined($j_sc_last) ) {
4620         my $line_number =
4621           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4622         $self->[_last_side_comment_column_]      = $last_side_comment_column;
4623         $self->[_last_side_comment_line_number_] = $line_number;
4624         $self->[_last_side_comment_level_]       = $group_level;
4625     }
4626     return;
4627 }
4628
4629 ###############################
4630 # CODE SECTION 6: Output Step A
4631 ###############################
4632
4633 sub valign_output_step_A {
4634
4635     #------------------------------------------------------------
4636     # This is Step A in writing vertically aligned lines.
4637     # The line is prepared according to the alignments which have
4638     # been found. Then it is shipped to the next step.
4639     #------------------------------------------------------------
4640
4641     my ( $self, $rinput_hash ) = @_;
4642
4643     my $line                 = $rinput_hash->{line};
4644     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
4645     my $do_not_align         = $rinput_hash->{do_not_align};
4646     my $group_leader_length  = $rinput_hash->{group_leader_length};
4647     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4648     my $level                = $rinput_hash->{level};
4649     my $maximum_line_length  = $rinput_hash->{maximum_line_length};
4650
4651     my $rfields                   = $line->{'rfields'};
4652     my $rfield_lengths            = $line->{'rfield_lengths'};
4653     my $leading_space_count       = $line->{'leading_space_count'};
4654     my $outdent_long_lines        = $line->{'outdent_long_lines'};
4655     my $maximum_field_index       = $line->{'jmax'};
4656     my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
4657     my $Kend                      = $line->{'Kend'};
4658     my $level_end                 = $line->{'level_end'};
4659
4660     # Check for valid hash keys at end of lifetime of $line during development
4661     DEVEL_MODE
4662       && check_keys( $line, \%valid_LINE_keys,
4663         "Checking line keys at valign_output_step_A", 1 );
4664
4665     # add any extra spaces
4666     if ( $leading_space_count > $group_leader_length ) {
4667         $leading_space_count += $min_ci_gap;
4668     }
4669
4670     my $str     = $rfields->[0];
4671     my $str_len = $rfield_lengths->[0];
4672
4673     my @alignments = @{ $line->{'ralignments'} };
4674     if ( @alignments != $maximum_field_index + 1 ) {
4675
4676         # Shouldn't happen: sub install_new_alignments makes jmax alignments
4677         my $jmax_alignments = @alignments - 1;
4678         if (DEVEL_MODE) {
4679             Fault(
4680 "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
4681             );
4682         }
4683         $do_not_align = 1;
4684     }
4685
4686     # loop to concatenate all fields of this line and needed padding
4687     my $total_pad_count = 0;
4688     for my $j ( 1 .. $maximum_field_index ) {
4689
4690         # skip zero-length side comments
4691         last
4692           if (
4693             ( $j == $maximum_field_index )
4694             && ( !defined( $rfields->[$j] )
4695                 || ( $rfield_lengths->[$j] == 0 ) )
4696           );
4697
4698         # compute spaces of padding before this field
4699         my $col = $alignments[ $j - 1 ]->{'column'};
4700         my $pad = $col - ( $str_len + $leading_space_count );
4701
4702         if ($do_not_align) {
4703             $pad =
4704               ( $j < $maximum_field_index )
4705               ? 0
4706               : $self->[_rOpts_minimum_space_to_comment_] - 1;
4707         }
4708
4709         # if the -fpsc flag is set, move the side comment to the selected
4710         # column if and only if it is possible, ignoring constraints on
4711         # line length and minimum space to comment
4712         if (   $self->[_rOpts_fixed_position_side_comment_]
4713             && $j == $maximum_field_index )
4714         {
4715             my $newpad =
4716               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4717             if ( $newpad >= 0 ) { $pad = $newpad; }
4718         }
4719
4720         # accumulate the padding
4721         if ( $pad > 0 ) { $total_pad_count += $pad; }
4722
4723         # only add padding when we have a finite field;
4724         # this avoids extra terminal spaces if we have empty fields
4725         if ( $rfield_lengths->[$j] > 0 ) {
4726             $str .= SPACE x $total_pad_count;
4727             $str_len += $total_pad_count;
4728             $total_pad_count = 0;
4729             $str .= $rfields->[$j];
4730             $str_len += $rfield_lengths->[$j];
4731         }
4732         else {
4733             $total_pad_count = 0;
4734         }
4735     }
4736
4737     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4738
4739     # ship this line off
4740     $self->valign_output_step_B(
4741         {
4742             leading_space_count => $leading_space_count + $extra_leading_spaces,
4743             line                => $str,
4744             line_length         => $str_len,
4745             side_comment_length => $side_comment_length,
4746             outdent_long_lines  => $outdent_long_lines,
4747             rvertical_tightness_flags => $rvertical_tightness_flags,
4748             level                     => $level,
4749             level_end                 => $level_end,
4750             Kend                      => $Kend,
4751             maximum_line_length       => $maximum_line_length,
4752         }
4753     );
4754     return;
4755 }
4756
4757 sub combine_fields {
4758
4759     # We have a group of two lines for which we do not want to align tokens
4760     # between index $imax_align and the side comment.  So we will delete fields
4761     # between $imax_align and the side comment.  Alignments have already
4762     # been set so we have to adjust them.
4763
4764     my ( $line_0, $line_1, $imax_align ) = @_;
4765
4766     if ( !defined($imax_align) ) { $imax_align = -1 }
4767
4768     # First delete the unwanted tokens
4769     my $jmax_old = $line_0->{'jmax'};
4770     my @idel     = ( $imax_align + 1 .. $jmax_old - 2 );
4771     return unless (@idel);
4772
4773     # Get old alignments before any changes are made
4774     my @old_alignments = @{ $line_0->{'ralignments'} };
4775
4776     foreach my $line ( $line_0, $line_1 ) {
4777         delete_selected_tokens( $line, \@idel );
4778     }
4779
4780     # Now adjust the alignments.  Note that the side comment alignment
4781     # is always at jmax-1, and there is an ending alignment at jmax.
4782     my @new_alignments;
4783     if ( $imax_align >= 0 ) {
4784         @new_alignments[ 0 .. $imax_align ] =
4785           @old_alignments[ 0 .. $imax_align ];
4786     }
4787
4788     my $jmax_new = $line_0->{'jmax'};
4789
4790     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4791     $new_alignments[$jmax_new]       = $old_alignments[$jmax_old];
4792     $line_0->{'ralignments'}         = \@new_alignments;
4793     $line_1->{'ralignments'}         = \@new_alignments;
4794     return;
4795 }
4796
4797 sub get_output_line_number {
4798
4799     # The output line number reported to a caller =
4800     # the number of items still in the buffer +
4801     # the number of items written.
4802     return $_[0]->group_line_count() +
4803       $_[0]->[_file_writer_object_]->get_output_line_number();
4804 }
4805
4806 ###############################
4807 # CODE SECTION 7: Output Step B
4808 ###############################
4809
4810 {    ## closure for sub valign_output_step_B
4811
4812     # These are values for a cache used by valign_output_step_B.
4813     my $cached_line_text;
4814     my $cached_line_text_length;
4815     my $cached_line_type;
4816     my $cached_line_opening_flag;
4817     my $cached_line_closing_flag;
4818     my $cached_seqno;
4819     my $cached_line_valid;
4820     my $cached_line_leading_space_count;
4821     my $cached_seqno_string;
4822     my $cached_line_Kend;
4823     my $cached_line_maximum_length;
4824
4825     # These are passed to step_C:
4826     my $seqno_string;
4827     my $last_nonblank_seqno_string;
4828
4829     sub set_last_nonblank_seqno_string {
4830         my ($val) = @_;
4831         $last_nonblank_seqno_string = $val;
4832         return;
4833     }
4834
4835     sub get_cached_line_opening_flag {
4836         return $cached_line_opening_flag;
4837     }
4838
4839     sub get_cached_line_type {
4840         return $cached_line_type;
4841     }
4842
4843     sub set_cached_line_valid {
4844         my ($val) = @_;
4845         $cached_line_valid = $val;
4846         return;
4847     }
4848
4849     sub get_cached_seqno {
4850         return $cached_seqno;
4851     }
4852
4853     sub initialize_step_B_cache {
4854
4855         # valign_output_step_B cache:
4856         $cached_line_text                = EMPTY_STRING;
4857         $cached_line_text_length         = 0;
4858         $cached_line_type                = 0;
4859         $cached_line_opening_flag        = 0;
4860         $cached_line_closing_flag        = 0;
4861         $cached_seqno                    = 0;
4862         $cached_line_valid               = 0;
4863         $cached_line_leading_space_count = 0;
4864         $cached_seqno_string             = EMPTY_STRING;
4865         $cached_line_Kend                = undef;
4866         $cached_line_maximum_length      = undef;
4867
4868         # These vars hold a string of sequence numbers joined together used by
4869         # the cache
4870         $seqno_string               = EMPTY_STRING;
4871         $last_nonblank_seqno_string = EMPTY_STRING;
4872         return;
4873     }
4874
4875     sub _flush_cache {
4876         my ($self) = @_;
4877         if ($cached_line_type) {
4878             $seqno_string = $cached_seqno_string;
4879             $self->valign_output_step_C(
4880                 $seqno_string,
4881                 $last_nonblank_seqno_string,
4882
4883                 $cached_line_text,
4884                 $cached_line_leading_space_count,
4885                 $self->[_last_level_written_],
4886                 $cached_line_Kend,
4887             );
4888             $cached_line_type           = 0;
4889             $cached_line_text           = EMPTY_STRING;
4890             $cached_line_text_length    = 0;
4891             $cached_seqno_string        = EMPTY_STRING;
4892             $cached_line_Kend           = undef;
4893             $cached_line_maximum_length = undef;
4894         }
4895         return;
4896     }
4897
4898     sub handle_cached_line {
4899
4900         my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
4901
4902         # The cached line will either be:
4903         # - written out, or
4904         # - or combined with the current line
4905
4906         my $last_level_written = $self->[_last_level_written_];
4907
4908         my $leading_space_count       = $rinput->{leading_space_count};
4909         my $str                       = $rinput->{line};
4910         my $str_length                = $rinput->{line_length};
4911         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4912         my $level                     = $rinput->{level};
4913         my $level_end                 = $rinput->{level_end};
4914         my $maximum_line_length       = $rinput->{maximum_line_length};
4915
4916         my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4917             $seqno_beg, $seqno_end );
4918         if ($rvertical_tightness_flags) {
4919
4920             $open_or_close = $rvertical_tightness_flags->{_vt_type};
4921             $seqno_beg     = $rvertical_tightness_flags->{_vt_seqno_beg};
4922         }
4923
4924         # Dump an invalid cached line
4925         if ( !$cached_line_valid ) {
4926             $self->valign_output_step_C(
4927                 $seqno_string,
4928                 $last_nonblank_seqno_string,
4929
4930                 $cached_line_text,
4931                 $cached_line_leading_space_count,
4932                 $last_level_written,
4933                 $cached_line_Kend,
4934             );
4935         }
4936
4937         # Handle cached line ending in OPENING tokens
4938         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
4939
4940             my $gap = $leading_space_count - $cached_line_text_length;
4941
4942             # handle option of just one tight opening per line:
4943             if ( $cached_line_opening_flag == 1 ) {
4944                 if ( defined($open_or_close) && $open_or_close == 1 ) {
4945                     $gap = -1;
4946                 }
4947             }
4948
4949             # Do not join the lines if this might produce a one-line
4950             # container which exceeds the maximum line length.  This is
4951             # necessary prevent blinking, particularly with the combination
4952             # -xci -pvt=2.  In that case a one-line block alternately forms
4953             # and breaks, causing -xci to alternately turn on and off (case
4954             # b765).
4955             # Patched to fix cases b656 b862 b971 b972: always do the check
4956             # if the maximum line length changes (due to -vmll).
4957             if (
4958                 $gap >= 0
4959                 && ( $maximum_line_length != $cached_line_maximum_length
4960                     || ( defined($level_end) && $level > $level_end ) )
4961               )
4962             {
4963                 my $test_line_length =
4964                   $cached_line_text_length + $gap + $str_length;
4965
4966                 # Add a small tolerance in the length test (fixes case b862)
4967                 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
4968                     $gap = -1;
4969                 }
4970             }
4971
4972             if ( $gap >= 0 && defined($seqno_beg) ) {
4973                 $maximum_line_length   = $cached_line_maximum_length;
4974                 $leading_string        = $cached_line_text . SPACE x $gap;
4975                 $leading_string_length = $cached_line_text_length + $gap;
4976                 $leading_space_count   = $cached_line_leading_space_count;
4977                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
4978                 $level        = $last_level_written;
4979             }
4980             else {
4981                 $self->valign_output_step_C(
4982                     $seqno_string,
4983                     $last_nonblank_seqno_string,
4984
4985                     $cached_line_text,
4986                     $cached_line_leading_space_count,
4987                     $last_level_written,
4988                     $cached_line_Kend,
4989                 );
4990             }
4991         }
4992
4993         # Handle cached line ending in CLOSING tokens
4994         else {
4995             my $test_line =
4996               $cached_line_text . SPACE x $cached_line_closing_flag . $str;
4997             my $test_line_length =
4998               $cached_line_text_length +
4999               $cached_line_closing_flag +
5000               $str_length;
5001             if (
5002
5003                 # The new line must start with container
5004                 $seqno_beg
5005
5006                 # The container combination must be okay..
5007                 && (
5008
5009                     # okay to combine like types
5010                     ( $open_or_close == $cached_line_type )
5011
5012                     # closing block brace may append to non-block
5013                     || ( $cached_line_type == 2 && $open_or_close == 4 )
5014
5015                     # something like ');'
5016                     || ( !$open_or_close && $cached_line_type == 2 )
5017
5018                 )
5019
5020                 # The combined line must fit
5021                 && ( $test_line_length <= $cached_line_maximum_length )
5022               )
5023             {
5024
5025                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5026
5027                 # Patch to outdent closing tokens ending # in ');' If we
5028                 # are joining a line like ');' to a previous stacked set of
5029                 # closing tokens, then decide if we may outdent the
5030                 # combined stack to the indentation of the ');'.  Since we
5031                 # should not normally outdent any of the other tokens more
5032                 # than the indentation of the lines that contained them, we
5033                 # will only do this if all of the corresponding opening
5034                 # tokens were on the same line.  This can happen with -sot
5035                 # and -sct.
5036
5037                 # For example, it is ok here:
5038                 #   __PACKAGE__->load_components( qw(
5039                 #         PK::Auto
5040                 #         Core
5041                 #   ));
5042                 #
5043                 # But, for example, we do not outdent in this example
5044                 # because that would put the closing sub brace out farther
5045                 # than the opening sub brace:
5046                 #
5047                 #   perltidy -sot -sct
5048                 #   $c->Tk::bind(
5049                 #       '<Control-f>' => sub {
5050                 #           my ($c) = @_;
5051                 #           my $e = $c->XEvent;
5052                 #           itemsUnderArea $c;
5053                 #       } );
5054                 #
5055                 if (   $str =~ /^\);/
5056                     && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5057                 {
5058
5059                     # The way to tell this is if the stacked sequence
5060                     # numbers of this output line are the reverse of the
5061                     # stacked sequence numbers of the previous non-blank
5062                     # line of sequence numbers.  So we can join if the
5063                     # previous nonblank string of tokens is the mirror
5064                     # image.  For example if stack )}] is 13:8:6 then we
5065                     # are looking for a leading stack like [{( which
5066                     # is 6:8:13. We only need to check the two ends,
5067                     # because the intermediate tokens must fall in order.
5068                     # Note on speed: having to split on colons and
5069                     # eliminate multiple colons might appear to be slow,
5070                     # but it's not an issue because we almost never come
5071                     # through here.  In a typical file we don't.
5072
5073                     $seqno_string               =~ s/^:+//;
5074                     $last_nonblank_seqno_string =~ s/^:+//;
5075                     $seqno_string               =~ s/:+/:/g;
5076                     $last_nonblank_seqno_string =~ s/:+/:/g;
5077
5078                     # how many spaces can we outdent?
5079                     my $diff =
5080                       $cached_line_leading_space_count - $leading_space_count;
5081                     if (   $diff > 0
5082                         && length($seqno_string)
5083                         && length($last_nonblank_seqno_string) ==
5084                         length($seqno_string) )
5085                     {
5086                         my @seqno_last =
5087                           ( split /:/, $last_nonblank_seqno_string );
5088                         my @seqno_now = ( split /:/, $seqno_string );
5089                         if (   @seqno_now
5090                             && @seqno_last
5091                             && $seqno_now[-1] == $seqno_last[0]
5092                             && $seqno_now[0] == $seqno_last[-1] )
5093                         {
5094
5095                             # OK to outdent ..
5096                             # for absolute safety, be sure we only remove
5097                             # whitespace
5098                             my $ws = substr( $test_line, 0, $diff );
5099                             if ( ( length($ws) == $diff )
5100                                 && $ws =~ /^\s+$/ )
5101                             {
5102
5103                                 $test_line = substr( $test_line, $diff );
5104                                 $cached_line_leading_space_count -= $diff;
5105                                 $last_level_written =
5106                                   $self->level_change(
5107                                     $cached_line_leading_space_count,
5108                                     $diff, $last_level_written );
5109                                 $self->reduce_valign_buffer_indentation($diff);
5110                             }
5111
5112                             # shouldn't happen, but not critical:
5113                             ##else {
5114                             ## ERROR transferring indentation here
5115                             ##}
5116                         }
5117                     }
5118                 }
5119
5120                 # Change the args to look like we received the combined line
5121                 $str                   = $test_line;
5122                 $str_length            = $test_line_length;
5123                 $leading_string        = EMPTY_STRING;
5124                 $leading_string_length = 0;
5125                 $leading_space_count   = $cached_line_leading_space_count;
5126                 $level                 = $last_level_written;
5127                 $maximum_line_length   = $cached_line_maximum_length;
5128             }
5129             else {
5130                 $self->valign_output_step_C(
5131                     $seqno_string,
5132                     $last_nonblank_seqno_string,
5133
5134                     $cached_line_text,
5135                     $cached_line_leading_space_count,
5136                     $last_level_written,
5137                     $cached_line_Kend,
5138                 );
5139             }
5140         }
5141         return ( $str, $str_length, $leading_string, $leading_string_length,
5142             $leading_space_count, $level, $maximum_line_length, );
5143
5144     } ## end sub handle_cached_line
5145
5146     sub valign_output_step_B {
5147
5148         #---------------------------------------------------------
5149         # This is Step B in writing vertically aligned lines.
5150         # Vertical tightness is applied according to preset flags.
5151         # In particular this routine handles stacking of opening
5152         # and closing tokens.
5153         #---------------------------------------------------------
5154
5155         my ( $self, $rinput ) = @_;
5156
5157         my $leading_space_count       = $rinput->{leading_space_count};
5158         my $str                       = $rinput->{line};
5159         my $str_length                = $rinput->{line_length};
5160         my $side_comment_length       = $rinput->{side_comment_length};
5161         my $outdent_long_lines        = $rinput->{outdent_long_lines};
5162         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
5163         my $level                     = $rinput->{level};
5164         my $level_end                 = $rinput->{level_end};
5165         my $Kend                      = $rinput->{Kend};
5166         my $maximum_line_length       = $rinput->{maximum_line_length};
5167
5168         # Useful -gcs test cases for wide characters are
5169         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
5170
5171         # handle outdenting of long lines:
5172         my $is_outdented_line;
5173         if ($outdent_long_lines) {
5174             my $excess =
5175               $str_length -
5176               $side_comment_length +
5177               $leading_space_count -
5178               $maximum_line_length;
5179             if ( $excess > 0 ) {
5180                 $leading_space_count = 0;
5181                 my $file_writer_object = $self->[_file_writer_object_];
5182                 my $last_outdented_line_at =
5183                   $file_writer_object->get_output_line_number();
5184                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
5185
5186                 my $outdented_line_count = $self->[_outdented_line_count_];
5187                 unless ($outdented_line_count) {
5188                     $self->[_first_outdented_line_at_] =
5189                       $last_outdented_line_at;
5190                 }
5191                 $outdented_line_count++;
5192                 $self->[_outdented_line_count_] = $outdented_line_count;
5193                 $is_outdented_line = 1;
5194             }
5195         }
5196
5197         # Make preliminary leading whitespace.  It could get changed
5198         # later by entabbing, so we have to keep track of any changes
5199         # to the leading_space_count from here on.
5200         my $leading_string =
5201           $leading_space_count > 0
5202           ? ( SPACE x $leading_space_count )
5203           : EMPTY_STRING;
5204         my $leading_string_length = length($leading_string);
5205
5206         # Unpack any recombination data; it was packed by
5207         # sub 'Formatter::set_vertical_tightness_flags'
5208
5209         # old   hash              Meaning
5210         # index key
5211         #
5212         # 0   _vt_type:           1=opening non-block    2=closing non-block
5213         #                         3=opening block brace  4=closing block brace
5214         #
5215         # 1a  _vt_opening_flag:  1=no multiple steps, 2=multiple steps ok
5216         # 1b  _vt_closing_flag:    spaces of padding to use if closing
5217         # 2   _vt_seqno:          sequence number of container
5218         # 3   _vt_valid flag:     do not append if this flag is false. Will be
5219         #           true if appropriate -vt flag is set.  Otherwise, Will be
5220         #           made true only for 2 line container in parens with -lp
5221         # 4   _vt_seqno_beg:      sequence number of first token of line
5222         # 5   _vt_seqno_end:      sequence number of last token of line
5223         # 6   _vt_min_lines:      min number of lines for joining opening cache,
5224         #                           0=no constraint
5225         # 7   _vt_max_lines:      max number of lines for joining opening cache,
5226         #                           0=no constraint
5227
5228         my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5229             $seqno_beg, $seqno_end );
5230         if ($rvertical_tightness_flags) {
5231
5232             $open_or_close = $rvertical_tightness_flags->{_vt_type};
5233             $opening_flag  = $rvertical_tightness_flags->{_vt_opening_flag};
5234             $closing_flag  = $rvertical_tightness_flags->{_vt_closing_flag};
5235             $seqno         = $rvertical_tightness_flags->{_vt_seqno};
5236             $valid         = $rvertical_tightness_flags->{_vt_valid_flag};
5237             $seqno_beg     = $rvertical_tightness_flags->{_vt_seqno_beg};
5238             $seqno_end     = $rvertical_tightness_flags->{_vt_seqno_end};
5239         }
5240
5241         $seqno_string = $seqno_end;
5242
5243         # handle any cached line ..
5244         # either append this line to it or write it out
5245         # Note: the function length() is used in this next test out of caution.
5246         # All testing has shown that the variable $cached_line_text_length is
5247         # correct, but its calculation is complex and a loss of cached text
5248         # would be a disaster.
5249         if ( length($cached_line_text) ) {
5250
5251             (
5252                 $str,
5253                 $str_length,
5254                 $leading_string,
5255                 $leading_string_length,
5256                 $leading_space_count,
5257                 $level,
5258                 $maximum_line_length
5259
5260             ) = $self->handle_cached_line( $rinput, $leading_string,
5261                 $leading_string_length );
5262
5263             $cached_line_type           = 0;
5264             $cached_line_text           = EMPTY_STRING;
5265             $cached_line_text_length    = 0;
5266             $cached_line_Kend           = undef;
5267             $cached_line_maximum_length = undef;
5268
5269         }
5270
5271         # make the line to be written
5272         my $line        = $leading_string . $str;
5273         my $line_length = $leading_string_length + $str_length;
5274
5275         # Safety check: be sure that a line to be cached as a stacked block
5276         # brace line ends in the appropriate opening or closing block brace.
5277         # This should always be the case if the caller set flags correctly.
5278         # Code '3' is for -sobb, code '4' is for -scbb.
5279         if ($open_or_close) {
5280             if (   $open_or_close == 3 && $line !~ /\{\s*$/
5281                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5282             {
5283                 $open_or_close = 0;
5284             }
5285         }
5286
5287         # write or cache this line ...
5288         # fix for case b999: do not cache an outdented line
5289         # fix for b1378: do not cache an empty line
5290         if (  !$open_or_close
5291             || $side_comment_length > 0
5292             || $is_outdented_line
5293             || !$line_length )
5294         {
5295             $self->valign_output_step_C(
5296                 $seqno_string,
5297                 $last_nonblank_seqno_string,
5298
5299                 $line,
5300                 $leading_space_count,
5301                 $level,
5302                 $Kend,
5303             );
5304         }
5305         else {
5306             $cached_line_text                = $line;
5307             $cached_line_text_length         = $line_length;
5308             $cached_line_type                = $open_or_close;
5309             $cached_line_opening_flag        = $opening_flag;
5310             $cached_line_closing_flag        = $closing_flag;
5311             $cached_seqno                    = $seqno;
5312             $cached_line_valid               = $valid;
5313             $cached_line_leading_space_count = $leading_space_count;
5314             $cached_seqno_string             = $seqno_string;
5315             $cached_line_Kend                = $Kend;
5316             $cached_line_maximum_length      = $maximum_line_length;
5317         }
5318
5319         $self->[_last_level_written_]       = $level;
5320         $self->[_last_side_comment_length_] = $side_comment_length;
5321         return;
5322     }
5323 }
5324
5325 ###############################
5326 # CODE SECTION 8: Output Step C
5327 ###############################
5328
5329 {    ## closure for sub valign_output_step_C
5330
5331     # Vertical alignment buffer used by valign_output_step_C
5332     my $valign_buffer_filling;
5333     my @valign_buffer;
5334
5335     sub initialize_valign_buffer {
5336         @valign_buffer         = ();
5337         $valign_buffer_filling = EMPTY_STRING;
5338         return;
5339     }
5340
5341     sub dump_valign_buffer {
5342         my ($self) = @_;
5343         if (@valign_buffer) {
5344             foreach (@valign_buffer) {
5345                 $self->valign_output_step_D( @{$_} );
5346             }
5347             @valign_buffer = ();
5348         }
5349         $valign_buffer_filling = EMPTY_STRING;
5350         return;
5351     }
5352
5353     sub reduce_valign_buffer_indentation {
5354
5355         my ( $self, $diff ) = @_;
5356         if ( $valign_buffer_filling && $diff ) {
5357             my $max_valign_buffer = @valign_buffer;
5358             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5359                 my ( $line, $leading_space_count, $level, $Kend ) =
5360                   @{ $valign_buffer[$i] };
5361                 my $ws = substr( $line, 0, $diff );
5362                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5363                     $line = substr( $line, $diff );
5364                 }
5365                 if ( $leading_space_count >= $diff ) {
5366                     $leading_space_count -= $diff;
5367                     $level =
5368                       $self->level_change( $leading_space_count, $diff,
5369                         $level );
5370                 }
5371                 $valign_buffer[$i] =
5372                   [ $line, $leading_space_count, $level, $Kend ];
5373             }
5374         }
5375         return;
5376     }
5377
5378     sub valign_output_step_C {
5379
5380         #-----------------------------------------------------------------------
5381         # This is Step C in writing vertically aligned lines.
5382         # Lines are either stored in a buffer or passed along to the next step.
5383         # The reason for storing lines is that we may later want to reduce their
5384         # indentation when -sot and -sct are both used.
5385         #-----------------------------------------------------------------------
5386         my (
5387             $self,
5388             $seqno_string,
5389             $last_nonblank_seqno_string,
5390
5391             @args_to_D,
5392         ) = @_;
5393
5394         # Dump any saved lines if we see a line with an unbalanced opening or
5395         # closing token.
5396         $self->dump_valign_buffer()
5397           if ( $seqno_string && $valign_buffer_filling );
5398
5399         # Either store or write this line
5400         if ($valign_buffer_filling) {
5401             push @valign_buffer, [@args_to_D];
5402         }
5403         else {
5404             $self->valign_output_step_D(@args_to_D);
5405         }
5406
5407         # For lines starting or ending with opening or closing tokens..
5408         if ($seqno_string) {
5409             $last_nonblank_seqno_string = $seqno_string;
5410             set_last_nonblank_seqno_string($seqno_string);
5411
5412             # Start storing lines when we see a line with multiple stacked
5413             # opening tokens.
5414             # patch for RT #94354, requested by Colin Williams
5415             if (   index( $seqno_string, ':' ) >= 0
5416                 && $seqno_string =~ /^\d+(\:+\d+)+$/
5417                 && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5418             {
5419
5420                 # This test is efficient but a little subtle: The first test
5421                 # says that we have multiple sequence numbers and hence
5422                 # multiple opening or closing tokens in this line.  The second
5423                 # part of the test rejects stacked closing and ternary tokens.
5424                 # So if we get here then we should have stacked unbalanced
5425                 # opening tokens.
5426
5427                 # Here is a complex example:
5428
5429                 # Foo($Bar[0], {  # (side comment)
5430                 #     baz => 1,
5431                 # });
5432
5433                 # The first line has sequence 6::4.  It does not begin with
5434                 # a closing token or ternary, so it passes the test and must be
5435                 # stacked opening tokens.
5436
5437                 # The last line has sequence 4:6 but is a stack of closing
5438                 # tokens, so it gets rejected.
5439
5440                 # Note that the sequence number of an opening token for a qw
5441                 # quote is a negative number and will be rejected.  For
5442                 # example, for the following line: skip_symbols([qw(
5443                 # $seqno_string='10:5:-1'.  It would be okay to accept it but I
5444                 # decided not to do this after testing.
5445
5446                 $valign_buffer_filling = $seqno_string;
5447
5448             }
5449         }
5450         return;
5451     }
5452 }
5453
5454 ###############################
5455 # CODE SECTION 9: Output Step D
5456 ###############################
5457
5458 sub valign_output_step_D {
5459
5460     #----------------------------------------------------------------
5461     # This is Step D in writing vertically aligned lines.
5462     # It is the end of the vertical alignment pipeline.
5463     # Write one vertically aligned line of code to the output object.
5464     #----------------------------------------------------------------
5465
5466     my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5467
5468     # The line is currently correct if there is no tabbing (recommended!)
5469     # We may have to lop off some leading spaces and replace with tabs.
5470     if ( $leading_space_count > 0 ) {
5471
5472         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5473         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5474         my $rOpts_entab_leading_whitespace =
5475           $self->[_rOpts_entab_leading_whitespace_];
5476
5477         # Nothing to do if no tabs
5478         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5479             || $rOpts_indent_columns <= 0 )
5480         {
5481
5482             # nothing to do
5483         }
5484
5485         # Handle entab option
5486         elsif ($rOpts_entab_leading_whitespace) {
5487
5488             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5489             # not correctly entabbed, nor were side comments: Increase leading
5490             # space count for a padded line to get correct tabbing
5491             if ( $line =~ /^(\s+)(.*)$/ ) {
5492                 my $spaces = length($1);
5493                 if ( $spaces > $leading_space_count ) {
5494                     $leading_space_count = $spaces;
5495                 }
5496             }
5497
5498             my $space_count =
5499               $leading_space_count % $rOpts_entab_leading_whitespace;
5500             my $tab_count =
5501               int( $leading_space_count / $rOpts_entab_leading_whitespace );
5502             my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5503             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5504                 substr( $line, 0, $leading_space_count ) = $leading_string;
5505             }
5506             else {
5507
5508                 # shouldn't happen - program error counting whitespace
5509                 # - skip entabbing
5510                 DEBUG_TABS
5511                   && warning(
5512 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5513                   );
5514             }
5515         }
5516
5517         # Handle option of one tab per level
5518         else {
5519             my $leading_string = ( "\t" x $level );
5520             my $space_count =
5521               $leading_space_count - $level * $rOpts_indent_columns;
5522
5523             # shouldn't happen:
5524             if ( $space_count < 0 ) {
5525
5526                 # But it could be an outdented comment
5527                 if ( $line !~ /^\s*#/ ) {
5528                     DEBUG_TABS
5529                       && warning(
5530 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5531                       );
5532                 }
5533                 $leading_string = ( SPACE x $leading_space_count );
5534             }
5535             else {
5536                 $leading_string .= ( SPACE x $space_count );
5537             }
5538             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5539                 substr( $line, 0, $leading_space_count ) = $leading_string;
5540             }
5541             else {
5542
5543                 # shouldn't happen - program error counting whitespace
5544                 # we'll skip entabbing
5545                 DEBUG_TABS
5546                   && warning(
5547 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5548                   );
5549             }
5550         }
5551     }
5552     my $file_writer_object = $self->[_file_writer_object_];
5553     $file_writer_object->write_code_line( $line . "\n", $Kend );
5554
5555     return;
5556 }
5557
5558 {    ## closure for sub get_leading_string
5559
5560     my @leading_string_cache;
5561
5562     sub initialize_leading_string_cache {
5563         @leading_string_cache = ();
5564         return;
5565     }
5566
5567     sub get_leading_string {
5568
5569         # define the leading whitespace string for this line..
5570         my ( $self, $leading_whitespace_count, $group_level ) = @_;
5571
5572         # Handle case of zero whitespace, which includes multi-line quotes
5573         # (which may have a finite level; this prevents tab problems)
5574         if ( $leading_whitespace_count <= 0 ) {
5575             return EMPTY_STRING;
5576         }
5577
5578         # look for previous result
5579         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5580             return $leading_string_cache[$leading_whitespace_count];
5581         }
5582
5583         # must compute a string for this number of spaces
5584         my $leading_string;
5585
5586         # Handle simple case of no tabs
5587         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5588         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5589         my $rOpts_entab_leading_whitespace =
5590           $self->[_rOpts_entab_leading_whitespace_];
5591
5592         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5593             || $rOpts_indent_columns <= 0 )
5594         {
5595             $leading_string = ( SPACE x $leading_whitespace_count );
5596         }
5597
5598         # Handle entab option
5599         elsif ($rOpts_entab_leading_whitespace) {
5600             my $space_count =
5601               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5602             my $tab_count = int(
5603                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5604             $leading_string = "\t" x $tab_count . SPACE x $space_count;
5605         }
5606
5607         # Handle option of one tab per level
5608         else {
5609             $leading_string = ( "\t" x $group_level );
5610             my $space_count =
5611               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5612
5613             # shouldn't happen:
5614             if ( $space_count < 0 ) {
5615                 DEBUG_TABS
5616                   && warning(
5617 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5618                   );
5619
5620                 # -- skip entabbing
5621                 $leading_string = ( SPACE x $leading_whitespace_count );
5622             }
5623             else {
5624                 $leading_string .= ( SPACE x $space_count );
5625             }
5626         }
5627         $leading_string_cache[$leading_whitespace_count] = $leading_string;
5628         return $leading_string;
5629     }
5630 } ## end get_leading_string
5631
5632 ##########################
5633 # CODE SECTION 10: Summary
5634 ##########################
5635
5636 sub report_anything_unusual {
5637     my $self = shift;
5638
5639     my $outdented_line_count = $self->[_outdented_line_count_];
5640     if ( $outdented_line_count > 0 ) {
5641         write_logfile_entry(
5642             "$outdented_line_count long lines were outdented:\n");
5643         my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5644         write_logfile_entry(
5645             "  First at output line $first_outdented_line_at\n");
5646
5647         if ( $outdented_line_count > 1 ) {
5648             my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5649             write_logfile_entry(
5650                 "   Last at output line $last_outdented_line_at\n");
5651         }
5652         write_logfile_entry(
5653             "  use -noll to prevent outdenting, -l=n to increase line length\n"
5654         );
5655         write_logfile_entry("\n");
5656     }
5657     return;
5658 }
5659 1;