lib/Perl/Tidy/VerticalAligner.pm

   1 package Perl::Tidy::VerticalAligner;
   2 use strict;
   3 use warnings;
   4 use Carp;
   5 use English qw( -no_match_vars );
   6 our $VERSION = '20230309';
   7 use Perl::Tidy::VerticalAligner::Alignment;
   8 use Perl::Tidy::VerticalAligner::Line;
   9
  10 use constant DEVEL_MODE   => 0;
  11 use constant EMPTY_STRING => q{};
  12 use constant SPACE        => q{ };
  13
  14 # The Perl::Tidy::VerticalAligner package collects output lines and
  15 # attempts to line up certain common tokens, such as => and #, which are
  16 # identified by the calling routine.
  17 #
  18 # Usage:
  19 #   - Initiate an object with a call to new().
  20 #   - Write lines one-by-one with calls to valign_input().
  21 #   - Make a final call to flush() to empty the pipeline.
  22 #
  23 # The sub valign_input collects lines into groups.  When a group reaches
  24 # the maximum possible size it is processed for alignment and output.
  25 # The maximum group size is reached whenever there is a change in indentation
  26 # level, a blank line, a block comment, or an external flush call.  The calling
  27 # routine may also force a break in alignment at any time.
  28 #
  29 # If the calling routine needs to interrupt the output and send other text to
  30 # the output, it must first call flush() to empty the output pipeline.  This
  31 # might occur for example if a block of pod text needs to be sent to the output
  32 # between blocks of code.
  33
  34 # It is essential that a final call to flush() be made. Otherwise some
  35 # final lines of text will be lost.
  36
  37 # Index...
  38 # CODE SECTION 1: Preliminary code, global definitions and sub new
  39 #                 sub new
  40 # CODE SECTION 2: Some Basic Utilities
  41 # CODE SECTION 3: Code to accept input and form groups
  42 #                 sub valign_input
  43 # CODE SECTION 4: Code to process comment lines
  44 #                 sub _flush_comment_lines
  45 # CODE SECTION 5: Code to process groups of code lines
  46 #                 sub _flush_group_lines
  47 # CODE SECTION 6: Output Step A
  48 #                 sub valign_output_step_A
  49 # CODE SECTION 7: Output Step B
  50 #                 sub valign_output_step_B
  51 # CODE SECTION 8: Output Step C
  52 #                 sub valign_output_step_C
  53 # CODE SECTION 9: Output Step D
  54 #                 sub valign_output_step_D
  55 # CODE SECTION 10: Summary
  56 #                 sub report_anything_unusual
  57
  58 ##################################################################
  59 # CODE SECTION 1: Preliminary code, global definitions and sub new
  60 ##################################################################
  61
  62 sub AUTOLOAD {
  63
  64     # Catch any undefined sub calls so that we are sure to get
  65     # some diagnostic information.  This sub should never be called
  66     # except for a programming error.
  67     our $AUTOLOAD;
  68     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  69     my ( $pkg, $fname, $lno ) = caller();
  70     my $my_package = __PACKAGE__;
  71     print STDERR <<EOM;
  72 ======================================================================
  73 Error detected in package '$my_package', version $VERSION
  74 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  75 Called from package: '$pkg'
  76 Called from File '$fname'  at line '$lno'
  77 This error is probably due to a recent programming change
  78 ======================================================================
  79 EOM
  80     exit 1;
  81 } ## end sub AUTOLOAD
  82
  83 sub DESTROY {
  84
  85     # required to avoid call to AUTOLOAD in some versions of perl
  86 }
  87
  88 sub Die {
  89     my ($msg) = @_;
  90     Perl::Tidy::Die($msg);
  91     croak "unexpected return from Perl::Tidy::Die";
  92 }
  93
  94 sub Fault {
  95     my ($msg) = @_;
  96
  97     # This routine is called for errors that really should not occur
  98     # except if there has been a bug introduced by a recent program change.
  99     # Please add comments at calls to Fault to explain why the call
 100     # should not occur, and where to look to fix it.
 101     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
 102     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 103     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 104     my $pkg = __PACKAGE__;
 105
 106     my $input_stream_name = get_input_stream_name();
 107
 108     Die(<<EOM);
 109 ==============================================================================
 110 While operating on input stream with name: '$input_stream_name'
 111 A fault was detected at line $line0 of sub '$subroutine1'
 112 in file '$filename1'
 113 which was called from line $line1 of sub '$subroutine2'
 114 Message: '$msg'
 115 This is probably an error introduced by a recent programming change.
 116 $pkg reports VERSION='$VERSION'.
 117 ==============================================================================
 118 EOM
 119
 120     # We shouldn't get here, but this return is to keep Perl-Critic from
 121     # complaining.
 122     return;
 123 } ## end sub Fault
 124
 125 my %valid_LINE_keys;
 126
 127 BEGIN {
 128
 129     # define valid keys in a line object
 130     my @q = qw(
 131       jmax
 132       rtokens
 133       rfields
 134       rfield_lengths
 135       rpatterns
 136       indentation
 137       leading_space_count
 138       outdent_long_lines
 139       list_type
 140       list_seqno
 141       is_hanging_side_comment
 142       maximum_line_length
 143       rvertical_tightness_flags
 144       is_terminal_ternary
 145       j_terminal_match
 146       end_group
 147       Kend
 148       ci_level
 149       level
 150       level_end
 151       imax_pair
 152
 153       ralignments
 154     );
 155
 156     @valid_LINE_keys{@q} = (1) x scalar(@q);
 157 } ## end BEGIN
 158
 159 BEGIN {
 160
 161     # Define the fixed indexes for variables in $self, which is an array
 162     # reference.  Note the convention of leading and trailing underscores to
 163     # keep them unique.
 164     # Do not combine with other BEGIN blocks (c101).
 165     my $i = 0;
 166     use constant {
 167         _file_writer_object_ => $i++,
 168         _logger_object_      => $i++,
 169         _diagnostics_object_ => $i++,
 170         _length_function_    => $i++,
 171
 172         _rOpts_                             => $i++,
 173         _rOpts_indent_columns_              => $i++,
 174         _rOpts_tabs_                        => $i++,
 175         _rOpts_entab_leading_whitespace_    => $i++,
 176         _rOpts_fixed_position_side_comment_ => $i++,
 177         _rOpts_minimum_space_to_comment_    => $i++,
 178         _rOpts_valign_code_                 => $i++,
 179         _rOpts_valign_block_comments_       => $i++,
 180         _rOpts_valign_side_comments_        => $i++,
 181
 182         _last_level_written_            => $i++,
 183         _last_side_comment_column_      => $i++,
 184         _last_side_comment_line_number_ => $i++,
 185         _last_side_comment_length_      => $i++,
 186         _last_side_comment_level_       => $i++,
 187         _outdented_line_count_          => $i++,
 188         _first_outdented_line_at_       => $i++,
 189         _last_outdented_line_at_        => $i++,
 190         _consecutive_block_comments_    => $i++,
 191
 192         _rgroup_lines_                => $i++,
 193         _group_level_                 => $i++,
 194         _group_type_                  => $i++,
 195         _group_maximum_line_length_   => $i++,
 196         _zero_count_                  => $i++,
 197         _last_leading_space_count_    => $i++,
 198         _comment_leading_space_count_ => $i++,
 199     };
 200
 201     # Debug flag. This is a relic from the original program development
 202     # looking for problems with tab characters.  Caution: this debug flag can
 203     # produce a lot of output It should be 0 except when debugging small
 204     # scripts.
 205
 206     use constant DEBUG_TABS => 0;
 207
 208     my $debug_warning = sub {
 209         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
 210         return;
 211     };
 212
 213     DEBUG_TABS && $debug_warning->('TABS');
 214 } ## end BEGIN
 215
 216 # GLOBAL variables
 217 my (
 218
 219     %valign_control_hash,
 220     $valign_control_default,
 221
 222 );
 223
 224 sub check_options {
 225
 226     # This routine is called to check the user-supplied run parameters
 227     # and to configure the control hashes to them.
 228     my ($rOpts) = @_;
 229
 230     # All alignments are done by default
 231     %valign_control_hash    = ();
 232     $valign_control_default = 1;
 233
 234     # If -vil=s is entered without -vxl, assume -vxl='*'
 235     if (  !$rOpts->{'valign-exclusion-list'}
 236         && $rOpts->{'valign-inclusion-list'} )
 237     {
 238         $rOpts->{'valign-exclusion-list'} = '*';
 239     }
 240
 241     # See if the user wants to exclude any alignment types ...
 242     if ( $rOpts->{'valign-exclusion-list'} ) {
 243
 244         # The inclusion list is only relevant if there is an exclusion list
 245         if ( $rOpts->{'valign-inclusion-list'} ) {
 246             my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
 247             @valign_control_hash{@vil} = (1) x scalar(@vil);
 248         }
 249
 250         # Note that the -vxl list is done after -vil, so -vxl has priority
 251         # in the event of duplicate entries.
 252         my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
 253         @valign_control_hash{@vxl} = (0) x scalar(@vxl);
 254
 255         # Optimization: revert to defaults if no exclusions.
 256         # This could happen with -vxl='  ' and any -vil list
 257         if ( !@vxl ) {
 258             %valign_control_hash = ();
 259         }
 260
 261         # '$valign_control_default' applies to types not in the hash:
 262         # - If a '*' was entered then set it to be that default type
 263         # - Otherwise, leave it set it to 1
 264         if ( defined( $valign_control_hash{'*'} ) ) {
 265             $valign_control_default = $valign_control_hash{'*'};
 266         }
 267
 268         # Side comments are controlled separately and must be removed
 269         # if given in a list.
 270         if (%valign_control_hash) {
 271             $valign_control_hash{'#'} = 1;
 272         }
 273     }
 274
 275     return;
 276 } ## end sub check_options
 277
 278 sub check_keys {
 279     my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
 280
 281     # Check the keys of a hash:
 282     # $rtest   = ref to hash to test
 283     # $rvalid  = ref to hash with valid keys
 284
 285     # $msg = a message to write in case of error
 286     # $exact_match defines the type of check:
 287     #     = false: test hash must not have unknown key
 288     #     = true:  test hash must have exactly same keys as known hash
 289     my @unknown_keys =
 290       grep { !exists $rvalid->{$_} } keys %{$rtest};
 291     my @missing_keys =
 292       grep { !exists $rtest->{$_} } keys %{$rvalid};
 293     my $error = @unknown_keys;
 294     if ($exact_match) { $error ||= @missing_keys }
 295     if ($error) {
 296         local $LIST_SEPARATOR = ')(';
 297         my @expected_keys = sort keys %{$rvalid};
 298         @unknown_keys = sort @unknown_keys;
 299         Fault(<<EOM);
 300 ------------------------------------------------------------------------
 301 Program error detected checking hash keys
 302 Message is: '$msg'
 303 Expected keys: (@expected_keys)
 304 Unknown key(s): (@unknown_keys)
 305 Missing key(s): (@missing_keys)
 306 ------------------------------------------------------------------------
 307 EOM
 308     }
 309     return;
 310 } ## end sub check_keys
 311
 312 sub new {
 313
 314     my ( $class, @args ) = @_;
 315
 316     my %defaults = (
 317         rOpts              => undef,
 318         file_writer_object => undef,
 319         logger_object      => undef,
 320         diagnostics_object => undef,
 321         length_function    => sub { return length( $_[0] ) },
 322     );
 323     my %args = ( %defaults, @args );
 324
 325     # Initialize other caches and buffers
 326     initialize_step_B_cache();
 327     initialize_valign_buffer();
 328     initialize_leading_string_cache();
 329     initialize_decode();
 330     set_logger_object( $args{logger_object} );
 331
 332     # Initialize all variables in $self.
 333     # To add an item to $self, first define a new constant index in the BEGIN
 334     # section.
 335     my $self = [];
 336
 337     # objects
 338     $self->[_file_writer_object_] = $args{file_writer_object};
 339     $self->[_logger_object_]      = $args{logger_object};
 340     $self->[_diagnostics_object_] = $args{diagnostics_object};
 341     $self->[_length_function_]    = $args{length_function};
 342
 343     # shortcuts to user options
 344     my $rOpts = $args{rOpts};
 345
 346     $self->[_rOpts_]                = $rOpts;
 347     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
 348     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
 349     $self->[_rOpts_entab_leading_whitespace_] =
 350       $rOpts->{'entab-leading-whitespace'};
 351     $self->[_rOpts_fixed_position_side_comment_] =
 352       $rOpts->{'fixed-position-side-comment'};
 353     $self->[_rOpts_minimum_space_to_comment_] =
 354       $rOpts->{'minimum-space-to-comment'};
 355     $self->[_rOpts_valign_code_]           = $rOpts->{'valign-code'};
 356     $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
 357     $self->[_rOpts_valign_side_comments_]  = $rOpts->{'valign-side-comments'};
 358
 359     # Batch of lines being collected
 360     $self->[_rgroup_lines_]                = [];
 361     $self->[_group_level_]                 = 0;
 362     $self->[_group_type_]                  = EMPTY_STRING;
 363     $self->[_group_maximum_line_length_]   = undef;
 364     $self->[_zero_count_]                  = 0;
 365     $self->[_comment_leading_space_count_] = 0;
 366     $self->[_last_leading_space_count_]    = 0;
 367
 368     # Memory of what has been processed
 369     $self->[_last_level_written_]            = -1;
 370     $self->[_last_side_comment_column_]      = 0;
 371     $self->[_last_side_comment_line_number_] = 0;
 372     $self->[_last_side_comment_length_]      = 0;
 373     $self->[_last_side_comment_level_]       = -1;
 374     $self->[_outdented_line_count_]          = 0;
 375     $self->[_first_outdented_line_at_]       = 0;
 376     $self->[_last_outdented_line_at_]        = 0;
 377     $self->[_consecutive_block_comments_]    = 0;
 378
 379     bless $self, $class;
 380     return $self;
 381 } ## end sub new
 382
 383 #################################
 384 # CODE SECTION 2: Basic Utilities
 385 #################################
 386
 387 sub flush {
 388
 389     # flush() is the external call to completely empty the pipeline.
 390     my ($self) = @_;
 391
 392     # push things out the pipeline...
 393
 394     # push out any current group lines
 395     $self->_flush_group_lines();
 396
 397     # then anything left in the cache of step_B
 398     $self->_flush_step_B_cache();
 399
 400     # then anything left in the buffer of step_C
 401     $self->dump_valign_buffer();
 402
 403     return;
 404 } ## end sub flush
 405
 406 sub initialize_for_new_group {
 407     my ($self) = @_;
 408
 409     $self->[_rgroup_lines_]                = [];
 410     $self->[_group_type_]                  = EMPTY_STRING;
 411     $self->[_zero_count_]                  = 0;
 412     $self->[_comment_leading_space_count_] = 0;
 413     $self->[_last_leading_space_count_]    = 0;
 414     $self->[_group_maximum_line_length_]   = undef;
 415
 416     # Note that the value for _group_level_ is
 417     # handled separately in sub valign_input
 418     return;
 419 } ## end sub initialize_for_new_group
 420
 421 sub group_line_count {
 422     return +@{ $_[0]->[_rgroup_lines_] };
 423 }
 424
 425 # interface to Perl::Tidy::Diagnostics routines
 426 # For debugging; not currently used
 427 sub write_diagnostics {
 428     my ( $self, $msg ) = @_;
 429     my $diagnostics_object = $self->[_diagnostics_object_];
 430     if ($diagnostics_object) {
 431         $diagnostics_object->write_diagnostics($msg);
 432     }
 433     return;
 434 } ## end sub write_diagnostics
 435
 436 {    ## begin closure for logger routines
 437     my $logger_object;
 438
 439     # Called once per file to initialize the logger object
 440     sub set_logger_object {
 441         $logger_object = shift;
 442         return;
 443     }
 444
 445     sub get_logger_object {
 446         return $logger_object;
 447     }
 448
 449     sub get_input_stream_name {
 450         my $input_stream_name = EMPTY_STRING;
 451         if ($logger_object) {
 452             $input_stream_name = $logger_object->get_input_stream_name();
 453         }
 454         return $input_stream_name;
 455     } ## end sub get_input_stream_name
 456
 457     sub warning {
 458         my ($msg) = @_;
 459         if ($logger_object) {
 460             $logger_object->warning($msg);
 461         }
 462         return;
 463     } ## end sub warning
 464
 465     sub write_logfile_entry {
 466         my ($msg) = @_;
 467         if ($logger_object) {
 468             $logger_object->write_logfile_entry($msg);
 469         }
 470         return;
 471     } ## end sub write_logfile_entry
 472 }
 473
 474 sub get_cached_line_count {
 475     my $self = shift;
 476     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
 477 }
 478
 479 sub get_recoverable_spaces {
 480
 481     # return the number of spaces (+ means shift right, - means shift left)
 482     # that we would like to shift a group of lines with the same indentation
 483     # to get them to line up with their opening parens
 484     my $indentation = shift;
 485     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
 486 } ## end sub get_recoverable_spaces
 487
 488 ######################################################
 489 # CODE SECTION 3: Code to accept input and form groups
 490 ######################################################
 491
 492 use constant DEBUG_VALIGN      => 0;
 493 use constant SC_LONG_LINE_DIFF => 12;
 494
 495 my %is_closing_token;
 496
 497 BEGIN {
 498     my @q = qw< } ) ] >;
 499     @is_closing_token{@q} = (1) x scalar(@q);
 500 }
 501
 502 #--------------------------------------------
 503 # VTFLAGS: Vertical tightness types and flags
 504 #--------------------------------------------
 505 # Vertical tightness is controlled by a 'type' and associated 'flags' for each
 506 # line.  These values are set by sub Formatter::set_vertical_tightness_flags.
 507 # These are defined as follows:
 508
 509 # Vertical Tightness Line Type Codes:
 510 # Type 0, no vertical tightness condition
 511 # Type 1, last token of this line is a non-block opening token
 512 # Type 2, first token of next line is a non-block closing
 513 # Type 3, isolated opening block brace
 514 # type 4, isolated closing block brace
 515
 516 # Opening token flag values are the vertical tightness flags
 517 # 0 do not join with next line
 518 # 1 just one join per line
 519 # 2 any number of joins
 520
 521 # Closing token flag values indicate spacing:
 522 # 0 = no space added before closing token
 523 # 1 = single space added before closing token
 524
 525 sub valign_input {
 526
 527     #---------------------------------------------------------------------
 528     # This is the front door of the vertical aligner.  On each call
 529     # we receive one line of specially marked text for vertical alignment.
 530     # We compare the line with the current group, and either:
 531     # - the line joins the current group if alignments match, or
 532     # - the current group is flushed and a new group is started otherwise
 533     #---------------------------------------------------------------------
 534     #
 535     # The key input parameters describing each line are:
 536     #     $level          = indentation level of this line
 537     #     $rfields        = ref to array of fields
 538     #     $rpatterns      = ref to array of patterns, one per field
 539     #     $rtokens        = ref to array of tokens starting fields 1,2,..
 540     #     $rfield_lengths = ref to array of field display widths
 541     #
 542     # Here is an example of what this package does.  In this example,
 543     # we are trying to line up both the '=>' and the '#'.
 544     #
 545     #         '18' => 'grave',    #   \`
 546     #         '19' => 'acute',    #   `'
 547     #         '20' => 'caron',    #   \v
 548     # <-tabs-><f1-><--field 2 ---><-f3->
 549     # |            |              |    |
 550     # |            |              |    |
 551     # col1        col2         col3 col4
 552     #
 553     # The calling routine has already broken the entire line into 3 fields as
 554     # indicated.  (So the work of identifying promising common tokens has
 555     # already been done).
 556     #
 557     # In this example, there will be 2 tokens being matched: '=>' and '#'.
 558     # They are the leading parts of fields 2 and 3, but we do need to know
 559     # what they are so that we can dump a group of lines when these tokens
 560     # change.
 561     #
 562     # The fields contain the actual characters of each field.  The patterns
 563     # are like the fields, but they contain mainly token types instead
 564     # of tokens, so they have fewer characters.  They are used to be
 565     # sure we are matching fields of similar type.
 566     #
 567     # In this example, there will be 4 column indexes being adjusted.  The
 568     # first one is always at zero.  The interior columns are at the start of
 569     # the matching tokens, and the last one tracks the maximum line length.
 570     #
 571     # Each time a new line comes in, it joins the current vertical
 572     # group if possible.  Otherwise it causes the current group to be flushed
 573     # and a new group is started.
 574     #
 575     # For each new group member, the column locations are increased, as
 576     # necessary, to make room for the new fields.  When the group is finally
 577     # output, these column numbers are used to compute the amount of spaces of
 578     # padding needed for each field.
 579     #
 580     # Programming note: the fields are assumed not to have any tab characters.
 581     # Tabs have been previously removed except for tabs in quoted strings and
 582     # side comments.  Tabs in these fields can mess up the column counting.
 583     # The log file warns the user if there are any such tabs.
 584
 585     my ( $self, $rcall_hash ) = @_;
 586
 587     # Unpack the call args. This form is significantly faster than getting them
 588     # one-by-one.
 589     my (
 590
 591         $Kend,
 592         $break_alignment_after,
 593         $break_alignment_before,
 594         $ci_level,
 595         $forget_side_comment,
 596         $indentation,
 597         $is_terminal_ternary,
 598         $level,
 599         $level_end,
 600         $list_seqno,
 601         $maximum_line_length,
 602         $outdent_long_lines,
 603         $rline_alignment,
 604         $rvertical_tightness_flags,
 605
 606       ) =
 607
 608       @{$rcall_hash}{
 609         qw(
 610           Kend
 611           break_alignment_after
 612           break_alignment_before
 613           ci_level
 614           forget_side_comment
 615           indentation
 616           is_terminal_ternary
 617           level
 618           level_end
 619           list_seqno
 620           maximum_line_length
 621           outdent_long_lines
 622           rline_alignment
 623           rvertical_tightness_flags
 624         )
 625       };
 626
 627     my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
 628       @{$rline_alignment};
 629
 630     # The index '$Kend' is a value which passed along with the line text to sub
 631     # 'write_code_line' for a convergence check.
 632
 633     # number of fields is $jmax
 634     # number of tokens between fields is $jmax-1
 635     my $jmax = @{$rfields} - 1;
 636
 637     my $leading_space_count =
 638       ref($indentation) ? $indentation->get_spaces() : $indentation;
 639
 640     # set outdented flag to be sure we either align within statements or
 641     # across statement boundaries, but not both.
 642     my $is_outdented =
 643       $self->[_last_leading_space_count_] > $leading_space_count;
 644     $self->[_last_leading_space_count_] = $leading_space_count;
 645
 646     # Identify a hanging side comment.  Hanging side comments have an empty
 647     # initial field.
 648     my $is_hanging_side_comment =
 649       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
 650
 651     # Undo outdented flag for a hanging side comment
 652     $is_outdented = 0 if $is_hanging_side_comment;
 653
 654     # Identify a block comment.
 655     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
 656
 657     # Block comment .. update count
 658     if ($is_block_comment) {
 659         $self->[_consecutive_block_comments_]++;
 660     }
 661
 662     # Not a block comment ..
 663     # Forget side comment column if we saw 2 or more block comments,
 664     # and reset the count
 665     else {
 666
 667         if ( $self->[_consecutive_block_comments_] > 1 ) {
 668             $self->forget_side_comment();
 669         }
 670         $self->[_consecutive_block_comments_] = 0;
 671     }
 672
 673     # Reset side comment location if we are entering a new block from level 0.
 674     # This is intended to keep them from drifting too far to the right.
 675     if ($forget_side_comment) {
 676         $self->forget_side_comment();
 677     }
 678
 679     my $is_balanced_line = $level_end == $level;
 680
 681     my $group_level               = $self->[_group_level_];
 682     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
 683
 684     DEBUG_VALIGN && do {
 685         my $nlines = $self->group_line_count();
 686         print STDOUT
 687 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
 688     };
 689
 690     # Validate cached line if necessary: If we can produce a container
 691     # with just 2 lines total by combining an existing cached opening
 692     # token with the closing token to follow, then we will mark both
 693     # cached flags as valid.
 694     my $cached_line_type = get_cached_line_type();
 695     if ($cached_line_type) {
 696         my $cached_line_opening_flag = get_cached_line_opening_flag();
 697         if ($rvertical_tightness_flags) {
 698             my $cached_seqno = get_cached_seqno();
 699             if (   $cached_seqno
 700                 && $rvertical_tightness_flags->{_vt_seqno}
 701                 && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
 702             {
 703
 704                 # Fix for b1187 and b1188: Normally this step is only done
 705                 # if the number of existing lines is 0 or 1.  But to prevent
 706                 # blinking, this range can be controlled by the caller.
 707                 # If zero values are given we fall back on the range 0 to 1.
 708                 my $line_count = $self->group_line_count();
 709                 my $min_lines  = $rvertical_tightness_flags->{_vt_min_lines};
 710                 my $max_lines  = $rvertical_tightness_flags->{_vt_max_lines};
 711                 $min_lines = 0 unless ($min_lines);
 712                 $max_lines = 1 unless ($max_lines);
 713                 if (   ( $line_count >= $min_lines )
 714                     && ( $line_count <= $max_lines ) )
 715                 {
 716                     $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
 717                     set_cached_line_valid(1);
 718                 }
 719             }
 720         }
 721
 722         # do not join an opening block brace (type 3, see VTFLAGS)
 723         # with an unbalanced line unless requested with a flag value of 2
 724         if (   $cached_line_type == 3
 725             && !$self->group_line_count()
 726             && $cached_line_opening_flag < 2
 727             && !$is_balanced_line )
 728         {
 729             set_cached_line_valid(0);
 730         }
 731     }
 732
 733     # shouldn't happen:
 734     if ( $level < 0 ) { $level = 0 }
 735
 736     # do not align code across indentation level changes
 737     # or changes in the maximum line length
 738     # or if vertical alignment is turned off
 739     if (
 740         $level != $group_level
 741         || (   $group_maximum_line_length
 742             && $maximum_line_length != $group_maximum_line_length )
 743         || $is_outdented
 744         || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
 745         || (   !$is_block_comment
 746             && !$self->[_rOpts_valign_side_comments_]
 747             && !$self->[_rOpts_valign_code_] )
 748       )
 749     {
 750
 751         $self->_flush_group_lines( $level - $group_level );
 752
 753         $group_level                         = $level;
 754         $self->[_group_level_]               = $group_level;
 755         $self->[_group_maximum_line_length_] = $maximum_line_length;
 756
 757         # Update leading spaces after the above flush because the leading space
 758         # count may have been changed if the -icp flag is in effect
 759         $leading_space_count =
 760           ref($indentation) ? $indentation->get_spaces() : $indentation;
 761     }
 762
 763     # --------------------------------------------------------------------
 764     # Collect outdentable block COMMENTS
 765     # --------------------------------------------------------------------
 766     if ( $self->[_group_type_] eq 'COMMENT' ) {
 767         if (   $is_block_comment
 768             && $outdent_long_lines
 769             && $leading_space_count == $self->[_comment_leading_space_count_] )
 770         {
 771
 772             # Note that for a comment group we are not storing a line
 773             # but rather just the text and its length.
 774             push @{ $self->[_rgroup_lines_] },
 775               [ $rfields->[0], $rfield_lengths->[0], $Kend ];
 776             return;
 777         }
 778         else {
 779             $self->_flush_group_lines();
 780         }
 781     }
 782
 783     my $rgroup_lines = $self->[_rgroup_lines_];
 784     if ( $break_alignment_before && @{$rgroup_lines} ) {
 785         $rgroup_lines->[-1]->{'end_group'} = 1;
 786     }
 787
 788     # --------------------------------------------------------------------
 789     # add dummy fields for terminal ternary
 790     # --------------------------------------------------------------------
 791     my $j_terminal_match;
 792
 793     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
 794         $j_terminal_match =
 795           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
 796             $rpatterns, $rfield_lengths, $group_level, );
 797         $jmax = @{$rfields} - 1;
 798     }
 799
 800     # --------------------------------------------------------------------
 801     # add dummy fields for else statement
 802     # --------------------------------------------------------------------
 803
 804     # Note the trailing space after 'else' here. If there were no space between
 805     # the else and the next '{' then we would not be able to do vertical
 806     # alignment of the '{'.
 807     if (   $rfields->[0] eq 'else '
 808         && @{$rgroup_lines}
 809         && $is_balanced_line )
 810     {
 811
 812         $j_terminal_match =
 813           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
 814             $rpatterns, $rfield_lengths );
 815         $jmax = @{$rfields} - 1;
 816     }
 817
 818     # --------------------------------------------------------------------
 819     # Handle simple line of code with no fields to match.
 820     # --------------------------------------------------------------------
 821     if ( $jmax <= 0 ) {
 822         $self->[_zero_count_]++;
 823
 824         if ( @{$rgroup_lines}
 825             && !get_recoverable_spaces( $rgroup_lines->[0]->{'indentation'} ) )
 826         {
 827
 828             # flush the current group if it has some aligned columns..
 829             # or we haven't seen a comment lately
 830             if (   $rgroup_lines->[0]->{'jmax'} > 1
 831                 || $self->[_zero_count_] > 3 )
 832             {
 833                 $self->_flush_group_lines();
 834
 835                 # Update '$rgroup_lines' - it will become a ref to empty array.
 836                 # This allows avoiding a call to get_group_line_count below.
 837                 $rgroup_lines = $self->[_rgroup_lines_];
 838             }
 839         }
 840
 841         # start new COMMENT group if this comment may be outdented
 842         if (   $is_block_comment
 843             && $outdent_long_lines
 844             && !@{$rgroup_lines} )
 845         {
 846             $self->[_group_type_]                  = 'COMMENT';
 847             $self->[_comment_leading_space_count_] = $leading_space_count;
 848             $self->[_group_maximum_line_length_]   = $maximum_line_length;
 849             push @{$rgroup_lines},
 850               [ $rfields->[0], $rfield_lengths->[0], $Kend ];
 851             return;
 852         }
 853
 854         # just write this line directly if no current group, no side comment,
 855         # and no space recovery is needed.
 856         if (   !@{$rgroup_lines}
 857             && !get_recoverable_spaces($indentation) )
 858         {
 859
 860             $self->valign_output_step_B(
 861                 {
 862                     leading_space_count       => $leading_space_count,
 863                     line                      => $rfields->[0],
 864                     line_length               => $rfield_lengths->[0],
 865                     side_comment_length       => 0,
 866                     outdent_long_lines        => $outdent_long_lines,
 867                     rvertical_tightness_flags => $rvertical_tightness_flags,
 868                     level                     => $level,
 869                     level_end                 => $level_end,
 870                     Kend                      => $Kend,
 871                     maximum_line_length       => $maximum_line_length,
 872                 }
 873             );
 874             return;
 875         }
 876     }
 877     else {
 878         $self->[_zero_count_] = 0;
 879     }
 880
 881     # --------------------------------------------------------------------
 882     # It simplifies things to create a zero length side comment
 883     # if none exists.
 884     # --------------------------------------------------------------------
 885     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
 886         $jmax += 1;
 887         $rtokens->[ $jmax - 1 ]  = '#';
 888         $rfields->[$jmax]        = EMPTY_STRING;
 889         $rfield_lengths->[$jmax] = 0;
 890         $rpatterns->[$jmax]      = '#';
 891     }
 892
 893     # --------------------------------------------------------------------
 894     # create an object to hold this line
 895     # --------------------------------------------------------------------
 896
 897     # The hash keys below must match the list of keys in %valid_LINE_keys.
 898     # Values in this hash are accessed directly, except for 'ralignments',
 899     # rather than with get/set calls for efficiency.
 900     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
 901         {
 902             jmax                      => $jmax,
 903             rtokens                   => $rtokens,
 904             rfields                   => $rfields,
 905             rpatterns                 => $rpatterns,
 906             rfield_lengths            => $rfield_lengths,
 907             indentation               => $indentation,
 908             leading_space_count       => $leading_space_count,
 909             outdent_long_lines        => $outdent_long_lines,
 910             list_seqno                => $list_seqno,
 911             list_type                 => EMPTY_STRING,
 912             is_hanging_side_comment   => $is_hanging_side_comment,
 913             rvertical_tightness_flags => $rvertical_tightness_flags,
 914             is_terminal_ternary       => $is_terminal_ternary,
 915             j_terminal_match          => $j_terminal_match,
 916             end_group                 => $break_alignment_after,
 917             Kend                      => $Kend,
 918             ci_level                  => $ci_level,
 919             level                     => $level,
 920             level_end                 => $level_end,
 921             imax_pair                 => -1,
 922             maximum_line_length       => $maximum_line_length,
 923
 924             ralignments => [],
 925         }
 926     );
 927
 928     DEVEL_MODE
 929       && check_keys( $new_line, \%valid_LINE_keys,
 930         "Checking line keys at line definition", 1 );
 931
 932     # --------------------------------------------------------------------
 933     # Decide if this is a simple list of items.
 934     # We use this to be less restrictive in deciding what to align.
 935     # --------------------------------------------------------------------
 936     decide_if_list($new_line) if ($list_seqno);
 937
 938     # --------------------------------------------------------------------
 939     # Append this line to the current group (or start new group)
 940     # --------------------------------------------------------------------
 941
 942     push @{ $self->[_rgroup_lines_] }, $new_line;
 943     $self->[_group_maximum_line_length_] = $maximum_line_length;
 944
 945     # output this group if it ends in a terminal else or ternary line
 946     if ( defined($j_terminal_match) ) {
 947         $self->_flush_group_lines();
 948     }
 949
 950     # Force break after jump to lower level
 951     elsif ($level_end < $level
 952         || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
 953     {
 954         $self->_flush_group_lines(-1);
 955     }
 956
 957     # --------------------------------------------------------------------
 958     # Some old debugging stuff
 959     # --------------------------------------------------------------------
 960     DEBUG_VALIGN && do {
 961         print STDOUT "exiting valign_input fields:";
 962         dump_array( @{$rfields} );
 963         print STDOUT "exiting valign_input tokens:";
 964         dump_array( @{$rtokens} );
 965         print STDOUT "exiting valign_input patterns:";
 966         dump_array( @{$rpatterns} );
 967     };
 968
 969     return;
 970 } ## end sub valign_input
 971
 972 sub join_hanging_comment {
 973
 974     # Add dummy fields to a hanging side comment to make it look
 975     # like the first line in its potential group.  This simplifies
 976     # the coding.
 977     my ( $new_line, $old_line ) = @_;
 978
 979     my $jmax = $new_line->{'jmax'};
 980
 981     # must be 2 fields
 982     return 0 unless $jmax == 1;
 983     my $rtokens = $new_line->{'rtokens'};
 984
 985     # the second field must be a comment
 986     return 0 unless $rtokens->[0] eq '#';
 987     my $rfields = $new_line->{'rfields'};
 988
 989     # the first field must be empty
 990     return 0 unless $rfields->[0] =~ /^\s*$/;
 991
 992     # the current line must have fewer fields
 993     my $maximum_field_index = $old_line->{'jmax'};
 994     return 0
 995       unless $maximum_field_index > $jmax;
 996
 997     # looks ok..
 998     my $rpatterns      = $new_line->{'rpatterns'};
 999     my $rfield_lengths = $new_line->{'rfield_lengths'};
1000
1001     $new_line->{'is_hanging_side_comment'} = 1;
1002
1003     $jmax                     = $maximum_field_index;
1004     $new_line->{'jmax'}       = $jmax;
1005     $rfields->[$jmax]         = $rfields->[1];
1006     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
1007     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
1008     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
1009
1010     foreach my $j ( 1 .. $jmax - 1 ) {
1011         $rfields->[$j]         = EMPTY_STRING;
1012         $rfield_lengths->[$j]  = 0;
1013         $rtokens->[ $j - 1 ]   = EMPTY_STRING;
1014         $rpatterns->[ $j - 1 ] = EMPTY_STRING;
1015     }
1016     return 1;
1017 } ## end sub join_hanging_comment
1018
1019 {    ## closure for sub decide_if_list
1020
1021     my %is_comma_token;
1022
1023     BEGIN {
1024
1025         my @q = qw( => );
1026         push @q, ',';
1027         @is_comma_token{@q} = (1) x scalar(@q);
1028     } ## end BEGIN
1029
1030     sub decide_if_list {
1031
1032         my $line = shift;
1033
1034         # A list will be taken to be a line with a forced break in which all
1035         # of the field separators are commas or comma-arrows (except for the
1036         # trailing #)
1037
1038         my $rtokens    = $line->{'rtokens'};
1039         my $test_token = $rtokens->[0];
1040         my ( $raw_tok, $lev, $tag, $tok_count ) =
1041           decode_alignment_token($test_token);
1042         if ( $is_comma_token{$raw_tok} ) {
1043             my $list_type = $test_token;
1044             my $jmax      = $line->{'jmax'};
1045
1046             foreach ( 1 .. $jmax - 2 ) {
1047                 ( $raw_tok, $lev, $tag, $tok_count ) =
1048                   decode_alignment_token( $rtokens->[$_] );
1049                 if ( !$is_comma_token{$raw_tok} ) {
1050                     $list_type = EMPTY_STRING;
1051                     last;
1052                 }
1053             }
1054             $line->{'list_type'} = $list_type;
1055         }
1056         return;
1057     } ## end sub decide_if_list
1058 }
1059
1060 sub fix_terminal_ternary {
1061
1062     # Add empty fields as necessary to align a ternary term
1063     # like this:
1064     #
1065     #  my $leapyear =
1066     #      $year % 4   ? 0
1067     #    : $year % 100 ? 1
1068     #    : $year % 400 ? 0
1069     #    :               1;
1070     #
1071     # returns the index of the terminal question token, if any
1072
1073     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
1074         $group_level )
1075       = @_;
1076
1077     return unless ($old_line);
1078     use constant EXPLAIN_TERNARY => 0;
1079
1080     if (%valign_control_hash) {
1081         my $align_ok = $valign_control_hash{'?'};
1082         $align_ok = $valign_control_default unless defined($align_ok);
1083         return unless ($align_ok);
1084     }
1085
1086     my $jmax        = @{$rfields} - 1;
1087     my $rfields_old = $old_line->{'rfields'};
1088
1089     my $rpatterns_old       = $old_line->{'rpatterns'};
1090     my $rtokens_old         = $old_line->{'rtokens'};
1091     my $maximum_field_index = $old_line->{'jmax'};
1092
1093     # look for the question mark after the :
1094     my ($jquestion);
1095     my $depth_question;
1096     my $pad        = EMPTY_STRING;
1097     my $pad_length = 0;
1098     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
1099         my $tok = $rtokens_old->[$j];
1100         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1101         if ( $raw_tok eq '?' ) {
1102             $depth_question = $lev;
1103
1104             # depth must be correct
1105             next unless ( $depth_question eq $group_level );
1106
1107             $jquestion = $j;
1108             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
1109                 $pad_length = length($1);
1110                 $pad        = SPACE x $pad_length;
1111             }
1112             else {
1113                 return;    # shouldn't happen
1114             }
1115             last;
1116         }
1117     }
1118     return unless ( defined($jquestion) );    # shouldn't happen
1119
1120     # Now splice the tokens and patterns of the previous line
1121     # into the else line to insure a match.  Add empty fields
1122     # as necessary.
1123     my $jadd = $jquestion;
1124
1125     # Work on copies of the actual arrays in case we have
1126     # to return due to an error
1127     my @fields        = @{$rfields};
1128     my @patterns      = @{$rpatterns};
1129     my @tokens        = @{$rtokens};
1130     my @field_lengths = @{$rfield_lengths};
1131
1132     EXPLAIN_TERNARY && do {
1133         local $LIST_SEPARATOR = '><';
1134         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
1135         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
1136         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1137         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
1138         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1139         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1140     };
1141
1142     # handle cases of leading colon on this line
1143     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1144
1145         my ( $colon, $therest ) = ( $1, $2 );
1146
1147         # Handle sub-case of first field with leading colon plus additional code
1148         # This is the usual situation as at the '1' below:
1149         #  ...
1150         #  : $year % 400 ? 0
1151         #  :               1;
1152         if ($therest) {
1153
1154             # Split the first field after the leading colon and insert padding.
1155             # Note that this padding will remain even if the terminal value goes
1156             # out on a separate line.  This does not seem to look to bad, so no
1157             # mechanism has been included to undo it.
1158             my $field1        = shift @fields;
1159             my $field_length1 = shift @field_lengths;
1160             my $len_colon     = length($colon);
1161             unshift @fields, ( $colon, $pad . $therest );
1162             unshift @field_lengths,
1163               ( $len_colon, $pad_length + $field_length1 - $len_colon );
1164
1165             # change the leading pattern from : to ?
1166             return unless ( $patterns[0] =~ s/^\:/?/ );
1167
1168             # install leading tokens and patterns of existing line
1169             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1170             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1171
1172             # insert appropriate number of empty fields
1173             splice( @fields,        1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1174             splice( @field_lengths, 1, 0, (0) x $jadd )            if $jadd;
1175         }
1176
1177         # handle sub-case of first field just equal to leading colon.
1178         # This can happen for example in the example below where
1179         # the leading '(' would create a new alignment token
1180         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1181         # :                        ( $mname = $name . '->' );
1182         else {
1183
1184             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
1185
1186             # prepend a leading ? onto the second pattern
1187             $patterns[1] = "?b" . $patterns[1];
1188
1189             # pad the second field
1190             $fields[1]        = $pad . $fields[1];
1191             $field_lengths[1] = $pad_length + $field_lengths[1];
1192
1193             # install leading tokens and patterns of existing line, replacing
1194             # leading token and inserting appropriate number of empty fields
1195             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
1196             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
1197             splice( @fields,        1, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1198             splice( @field_lengths, 1, 0, (0) x $jadd )            if $jadd;
1199         }
1200     }
1201
1202     # Handle case of no leading colon on this line.  This will
1203     # be the case when -wba=':' is used.  For example,
1204     #  $year % 400 ? 0 :
1205     #                1;
1206     else {
1207
1208         # install leading tokens and patterns of existing line
1209         $patterns[0] = '?' . 'b' . $patterns[0];
1210         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1211         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1212
1213         # insert appropriate number of empty fields
1214         $jadd             = $jquestion + 1;
1215         $fields[0]        = $pad . $fields[0];
1216         $field_lengths[0] = $pad_length + $field_lengths[0];
1217         splice( @fields,        0, 0, (EMPTY_STRING) x $jadd ) if $jadd;
1218         splice( @field_lengths, 0, 0, (0) x $jadd )            if $jadd;
1219     }
1220
1221     EXPLAIN_TERNARY && do {
1222         local $LIST_SEPARATOR = '><';
1223         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
1224         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
1225         print STDOUT "MODIFIED FIELDS=<@fields>\n";
1226     };
1227
1228     # all ok .. update the arrays
1229     @{$rfields}        = @fields;
1230     @{$rtokens}        = @tokens;
1231     @{$rpatterns}      = @patterns;
1232     @{$rfield_lengths} = @field_lengths;
1233
1234     # force a flush after this line
1235     return $jquestion;
1236 } ## end sub fix_terminal_ternary
1237
1238 sub fix_terminal_else {
1239
1240     # Add empty fields as necessary to align a balanced terminal
1241     # else block to a previous if/elsif/unless block,
1242     # like this:
1243     #
1244     #  if   ( 1 || $x ) { print "ok 13\n"; }
1245     #  else             { print "not ok 13\n"; }
1246     #
1247     # returns a positive value if the else block should be indented
1248     #
1249     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1250
1251     return unless ($old_line);
1252     my $jmax = @{$rfields} - 1;
1253     return unless ( $jmax > 0 );
1254
1255     if (%valign_control_hash) {
1256         my $align_ok = $valign_control_hash{'{'};
1257         $align_ok = $valign_control_default unless defined($align_ok);
1258         return unless ($align_ok);
1259     }
1260
1261     # check for balanced else block following if/elsif/unless
1262     my $rfields_old = $old_line->{'rfields'};
1263
1264     # TBD: add handling for 'case'
1265     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1266
1267     # look for the opening brace after the else, and extract the depth
1268     my $tok_brace = $rtokens->[0];
1269     my $depth_brace;
1270     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1271
1272     # probably:  "else # side_comment"
1273     else { return }
1274
1275     my $rpatterns_old       = $old_line->{'rpatterns'};
1276     my $rtokens_old         = $old_line->{'rtokens'};
1277     my $maximum_field_index = $old_line->{'jmax'};
1278
1279     # be sure the previous if/elsif is followed by an opening paren
1280     my $jparen    = 0;
1281     my $tok_paren = '(' . $depth_brace;
1282     my $tok_test  = $rtokens_old->[$jparen];
1283     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
1284
1285     # Now find the opening block brace
1286     my ($jbrace);
1287     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1288         my $tok = $rtokens_old->[$j];
1289         if ( $tok eq $tok_brace ) {
1290             $jbrace = $j;
1291             last;
1292         }
1293     }
1294     return unless ( defined($jbrace) );           # shouldn't happen
1295
1296     # Now splice the tokens and patterns of the previous line
1297     # into the else line to insure a match.  Add empty fields
1298     # as necessary.
1299     my $jadd = $jbrace - $jparen;
1300     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1301     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1302     splice( @{$rfields},        1, 0, (EMPTY_STRING) x $jadd );
1303     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1304
1305     # force a flush after this line if it does not follow a case
1306     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1307     else                                      { return $jbrace }
1308 } ## end sub fix_terminal_else
1309
1310 my %is_closing_block_type;
1311
1312 BEGIN {
1313     my @q = qw< } ] >;
1314     @is_closing_block_type{@q} = (1) x scalar(@q);
1315 }
1316
1317 # This is a flag for testing alignment by sub sweep_left_to_right only.
1318 # This test can help find problems with the alignment logic.
1319 # This flag should normally be zero.
1320 use constant TEST_SWEEP_ONLY => 0;
1321
1322 use constant EXPLAIN_CHECK_MATCH => 0;
1323
1324 sub check_match {
1325
1326     # See if the current line matches the current vertical alignment group.
1327
1328     my ( $self, $new_line, $base_line, $prev_line ) = @_;
1329
1330     # Given:
1331     #  $new_line  = the line being considered for group inclusion
1332     #  $base_line = the first line of the current group
1333     #  $prev_line = the line just before $new_line
1334
1335     # returns a flag and a value as follows:
1336     #    return (0, $imax_align)   if the line does not match
1337     #    return (1, $imax_align)   if the line matches but does not fit
1338     #    return (2, $imax_align)   if the line matches and fits
1339
1340     use constant NO_MATCH      => 0;
1341     use constant MATCH_NO_FIT  => 1;
1342     use constant MATCH_AND_FIT => 2;
1343
1344     my $return_value;
1345
1346     # Returns '$imax_align' which is the index of the maximum matching token.
1347     # It will be used in the subsequent left-to-right sweep to align as many
1348     # tokens as possible for lines which partially match.
1349     my $imax_align = -1;
1350
1351     # variable $GoToMsg explains reason for no match, for debugging
1352     my $GoToMsg = EMPTY_STRING;
1353
1354     my $jmax                = $new_line->{'jmax'};
1355     my $maximum_field_index = $base_line->{'jmax'};
1356
1357     my $jlimit = $jmax - 2;
1358     if ( $jmax > $maximum_field_index ) {
1359         $jlimit = $maximum_field_index - 2;
1360     }
1361
1362     if ( $new_line->{'is_hanging_side_comment'} ) {
1363
1364         # HSC's can join the group if they fit
1365     }
1366
1367     # Everything else
1368     else {
1369
1370         # A group with hanging side comments ends with the first non hanging
1371         # side comment.
1372         if ( $base_line->{'is_hanging_side_comment'} ) {
1373             $GoToMsg      = "end of hanging side comments";
1374             $return_value = NO_MATCH;
1375         }
1376         else {
1377
1378             # The number of tokens that this line shares with the previous
1379             # line has been stored with the previous line.  This value was
1380             # calculated and stored by sub 'match_line_pair'.
1381             $imax_align = $prev_line->{'imax_pair'};
1382
1383             if ( $imax_align != $jlimit ) {
1384                 $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1385                 $return_value = NO_MATCH;
1386             }
1387         }
1388     }
1389
1390     if ( !defined($return_value) ) {
1391
1392         # The tokens match, but the lines must have identical number of
1393         # tokens to join the group.
1394         if ( $maximum_field_index != $jmax ) {
1395             $GoToMsg      = "token count differs";
1396             $return_value = NO_MATCH;
1397         }
1398
1399         # The tokens match. Now See if there is space for this line in the
1400         # current group.
1401         elsif ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY )
1402         {
1403
1404             $GoToMsg = "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1405             $return_value = MATCH_AND_FIT;
1406             $imax_align   = $jlimit;
1407         }
1408         else {
1409             $GoToMsg = "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1410             $return_value = MATCH_NO_FIT;
1411             $imax_align   = $jlimit;
1412         }
1413     }
1414
1415     EXPLAIN_CHECK_MATCH
1416       && print
1417 "returning $return_value because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1418
1419     return ( $return_value, $imax_align );
1420 } ## end sub check_match
1421
1422 sub check_fit {
1423
1424     my ( $self, $new_line, $old_line ) = @_;
1425
1426     # The new line has alignments identical to the current group. Now we have
1427     # to fit the new line into the group without causing a field to exceed the
1428     # line length limit.
1429     #   return true if successful
1430     #   return false if not successful
1431
1432     my $jmax                = $new_line->{'jmax'};
1433     my $leading_space_count = $new_line->{'leading_space_count'};
1434     my $rfield_lengths      = $new_line->{'rfield_lengths'};
1435     my $padding_available   = $old_line->get_available_space_on_right();
1436     my $jmax_old            = $old_line->{'jmax'};
1437
1438     # Safety check ... only lines with equal array sizes should arrive here
1439     # from sub check_match.  So if this error occurs, look at recent changes in
1440     # sub check_match.  It is only supposed to check the fit of lines with
1441     # identical numbers of alignment tokens.
1442     if ( $jmax_old ne $jmax ) {
1443
1444         warning(<<EOM);
1445 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1446 unexpected difference in array lengths: $jmax != $jmax_old
1447 EOM
1448         return;
1449     }
1450
1451     # Save current columns in case this line does not fit.
1452     my @alignments = @{ $old_line->{'ralignments'} };
1453     foreach my $alignment (@alignments) {
1454         $alignment->save_column();
1455     }
1456
1457     # Loop over all alignments ...
1458     for my $j ( 0 .. $jmax ) {
1459
1460         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1461
1462         if ( $j == 0 ) {
1463             $pad += $leading_space_count;
1464         }
1465
1466         # Keep going if this field does not need any space.
1467         next if ( $pad < 0 );
1468
1469         # Revert to the starting state if does not fit
1470         if ( $pad > $padding_available ) {
1471
1472             #----------------------------------------------
1473             # Line does not fit -- revert to starting state
1474             #----------------------------------------------
1475             foreach my $alignment (@alignments) {
1476                 $alignment->restore_column();
1477             }
1478             return;
1479         }
1480
1481         # make room for this field
1482         $old_line->increase_field_width( $j, $pad );
1483         $padding_available -= $pad;
1484     }
1485
1486     #-------------------------------------
1487     # The line fits, the match is accepted
1488     #-------------------------------------
1489     return 1;
1490
1491 } ## end sub check_fit
1492
1493 sub install_new_alignments {
1494
1495     my ($new_line) = @_;
1496
1497     my $jmax           = $new_line->{'jmax'};
1498     my $rfield_lengths = $new_line->{'rfield_lengths'};
1499     my $col            = $new_line->{'leading_space_count'};
1500
1501     my @alignments;
1502     for my $j ( 0 .. $jmax ) {
1503         $col += $rfield_lengths->[$j];
1504
1505         # create initial alignments for the new group
1506         my $alignment =
1507           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1508         push @alignments, $alignment;
1509     }
1510     $new_line->{'ralignments'} = \@alignments;
1511     return;
1512 } ## end sub install_new_alignments
1513
1514 sub copy_old_alignments {
1515     my ( $new_line, $old_line ) = @_;
1516     my @new_alignments = @{ $old_line->{'ralignments'} };
1517     $new_line->{'ralignments'} = \@new_alignments;
1518     return;
1519 } ## end sub copy_old_alignments
1520
1521 sub dump_array {
1522
1523     # debug routine to dump array contents
1524     local $LIST_SEPARATOR = ')(';
1525     print STDOUT "(@_)\n";
1526     return;
1527 } ## end sub dump_array
1528
1529 sub level_change {
1530
1531     # compute decrease in level when we remove $diff spaces from the
1532     # leading spaces
1533     my ( $self, $leading_space_count, $diff, $level ) = @_;
1534
1535     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1536     if ($rOpts_indent_columns) {
1537         my $olev =
1538           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1539         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1540         $level -= ( $olev - $nlev );
1541         if ( $level < 0 ) { $level = 0 }
1542     }
1543     return $level;
1544 } ## end sub level_change
1545
1546 ###############################################
1547 # CODE SECTION 4: Code to process comment lines
1548 ###############################################
1549
1550 sub _flush_comment_lines {
1551
1552     # Output a group consisting of COMMENT lines
1553
1554     my ($self) = @_;
1555     my $rgroup_lines = $self->[_rgroup_lines_];
1556     return unless ( @{$rgroup_lines} );
1557     my $group_level               = $self->[_group_level_];
1558     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1559     my $leading_space_count       = $self->[_comment_leading_space_count_];
1560 ##  my $leading_string =
1561 ##    $self->get_leading_string( $leading_space_count, $group_level );
1562
1563     # look for excessively long lines
1564     my $max_excess = 0;
1565     foreach my $item ( @{$rgroup_lines} ) {
1566         my ( $str, $str_len ) = @{$item};
1567         my $excess =
1568           $str_len + $leading_space_count - $group_maximum_line_length;
1569         if ( $excess > $max_excess ) {
1570             $max_excess = $excess;
1571         }
1572     }
1573
1574     # zero leading space count if any lines are too long
1575     if ( $max_excess > 0 ) {
1576         $leading_space_count -= $max_excess;
1577         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1578         my $file_writer_object = $self->[_file_writer_object_];
1579         my $last_outdented_line_at =
1580           $file_writer_object->get_output_line_number();
1581         my $nlines = @{$rgroup_lines};
1582         $self->[_last_outdented_line_at_] =
1583           $last_outdented_line_at + $nlines - 1;
1584         my $outdented_line_count = $self->[_outdented_line_count_];
1585         unless ($outdented_line_count) {
1586             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1587         }
1588         $outdented_line_count += $nlines;
1589         $self->[_outdented_line_count_] = $outdented_line_count;
1590     }
1591
1592     # write the lines
1593     my $outdent_long_lines = 0;
1594
1595     foreach my $item ( @{$rgroup_lines} ) {
1596         my ( $str, $str_len, $Kend ) = @{$item};
1597         $self->valign_output_step_B(
1598             {
1599                 leading_space_count       => $leading_space_count,
1600                 line                      => $str,
1601                 line_length               => $str_len,
1602                 side_comment_length       => 0,
1603                 outdent_long_lines        => $outdent_long_lines,
1604                 rvertical_tightness_flags => undef,
1605                 level                     => $group_level,
1606                 level_end                 => $group_level,
1607                 Kend                      => $Kend,
1608                 maximum_line_length       => $group_maximum_line_length,
1609             }
1610         );
1611     }
1612
1613     $self->initialize_for_new_group();
1614     return;
1615 } ## end sub _flush_comment_lines
1616
1617 ######################################################
1618 # CODE SECTION 5: Code to process groups of code lines
1619 ######################################################
1620
1621 sub _flush_group_lines {
1622
1623     # This is the vertical aligner internal flush, which leaves the cache
1624     # intact
1625     my ( $self, $level_jump ) = @_;
1626
1627     # $level_jump = $next_level-$group_level, if known
1628     #             = undef if not known
1629     # Note: only the sign of the jump is needed
1630
1631     my $rgroup_lines = $self->[_rgroup_lines_];
1632     return unless ( @{$rgroup_lines} );
1633     my $group_type  = $self->[_group_type_];
1634     my $group_level = $self->[_group_level_];
1635
1636     # Debug
1637     0 && do {
1638         my ( $a, $b, $c ) = caller();
1639         my $nlines = @{$rgroup_lines};
1640         print STDOUT
1641 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1642     };
1643
1644     #-------------------------------------------
1645     # Section 1: Handle a group of COMMENT lines
1646     #-------------------------------------------
1647     if ( $group_type eq 'COMMENT' ) {
1648         $self->_flush_comment_lines();
1649         return;
1650     }
1651
1652     #------------------------------------------------------------------------
1653     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
1654     # aligning happens here in the following steps:
1655     #------------------------------------------------------------------------
1656
1657     # STEP 1: Remove most unmatched tokens. They block good alignments.
1658     my ( $max_lev_diff, $saw_side_comment ) =
1659       delete_unmatched_tokens( $rgroup_lines, $group_level );
1660
1661     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1662     # matching common alignments.  The indexes of these subgroups are in the
1663     # return variable.
1664     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1665
1666     # STEP 3: Sweep left to right through the lines, looking for leading
1667     # alignment tokens shared by groups.
1668     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1669       if ( @{$rgroups} > 1 );
1670
1671     # STEP 4: Move side comments to a common column if possible.
1672     if ($saw_side_comment) {
1673         $self->align_side_comments( $rgroup_lines, $rgroups );
1674     }
1675
1676     # STEP 5: For the -lp option, increase the indentation of lists
1677     # to the desired amount, but do not exceed the line length limit.
1678
1679     # We are allowed to shift a group of lines to the right if:
1680     #  (1) its level is greater than the level of the previous group, and
1681     #  (2) its level is greater than the level of the next line to be written.
1682
1683     my $extra_indent_ok;
1684     if ( $group_level > $self->[_last_level_written_] ) {
1685
1686         # Use the level jump to next line to come, if given
1687         if ( defined($level_jump) ) {
1688             $extra_indent_ok = $level_jump < 0;
1689         }
1690
1691         # Otherwise, assume the next line has the level of the end of last line.
1692         # This fixes case c008.
1693         else {
1694             my $level_end = $rgroup_lines->[-1]->{'level_end'};
1695             $extra_indent_ok = $group_level > $level_end;
1696         }
1697     }
1698
1699     my $extra_leading_spaces =
1700       $extra_indent_ok
1701       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1702       : 0;
1703
1704     # STEP 6: Output the lines.
1705     # All lines in this group have the same leading spacing and maximum line
1706     # length
1707     my $group_leader_length       = $rgroup_lines->[0]->{'leading_space_count'};
1708     my $group_maximum_line_length = $rgroup_lines->[0]->{'maximum_line_length'};
1709
1710     foreach my $line ( @{$rgroup_lines} ) {
1711         $self->valign_output_step_A(
1712             {
1713                 line                 => $line,
1714                 min_ci_gap           => 0,
1715                 do_not_align         => 0,
1716                 group_leader_length  => $group_leader_length,
1717                 extra_leading_spaces => $extra_leading_spaces,
1718                 level                => $group_level,
1719                 maximum_line_length  => $group_maximum_line_length,
1720             }
1721         );
1722     }
1723
1724     # Let the formatter know that this object has been processed and any
1725     # recoverable spaces have been handled.  This is needed for setting the
1726     # closing paren location in -lp mode.
1727     my $object = $rgroup_lines->[0]->{'indentation'};
1728     if ( ref($object) ) { $object->set_recoverable_spaces(0) }
1729
1730     $self->initialize_for_new_group();
1731     return;
1732 } ## end sub _flush_group_lines
1733
1734 {    ## closure for sub sweep_top_down
1735
1736     my $rall_lines;         # all of the lines
1737     my $grp_level;          # level of all lines
1738     my $rgroups;            # describes the partition of lines we will make here
1739     my $group_line_count;   # number of lines in current partition
1740
1741     BEGIN { $rgroups = [] }
1742
1743     sub initialize_for_new_rgroup {
1744         $group_line_count = 0;
1745         return;
1746     }
1747
1748     sub add_to_rgroup {
1749
1750         my ($jend) = @_;
1751         my $rline = $rall_lines->[$jend];
1752
1753         my $jbeg = $jend;
1754         if ( $group_line_count == 0 ) {
1755             install_new_alignments($rline);
1756         }
1757         else {
1758             my $rvals = pop @{$rgroups};
1759             $jbeg = $rvals->[0];
1760             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1761         }
1762         push @{$rgroups}, [ $jbeg, $jend, undef ];
1763         $group_line_count++;
1764         return;
1765     } ## end sub add_to_rgroup
1766
1767     sub get_rgroup_jrange {
1768
1769         return unless @{$rgroups};
1770         return unless ( $group_line_count > 0 );
1771         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1772         return ( $jbeg, $jend );
1773     } ## end sub get_rgroup_jrange
1774
1775     sub end_rgroup {
1776
1777         my ($imax_align) = @_;
1778         return unless @{$rgroups};
1779         return unless ( $group_line_count > 0 );
1780
1781         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1782         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1783
1784         # Undo some alignments of poor two-line combinations.
1785         # We had to wait until now to know the line count.
1786         if ( $jend - $jbeg == 1 ) {
1787             my $line_0 = $rall_lines->[$jbeg];
1788             my $line_1 = $rall_lines->[$jend];
1789
1790             my $imax_pair = $line_1->{'imax_pair'};
1791             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1792
1793             ## flag for possible future use:
1794             ## my $is_isolated_pair = $imax_pair < 0
1795             ##  && ( $jbeg == 0
1796             ##    || $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} < 0 );
1797
1798             my $imax_prev =
1799               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->{'imax_pair'} : -1;
1800
1801             my ( $is_marginal, $imax_align_fix ) =
1802               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1803                 $imax_prev );
1804             if ($is_marginal) {
1805                 combine_fields( $line_0, $line_1, $imax_align_fix );
1806             }
1807         }
1808
1809         initialize_for_new_rgroup();
1810         return;
1811     } ## end sub end_rgroup
1812
1813     sub block_penultimate_match {
1814
1815         # emergency reset to prevent sweep_left_to_right from trying to match a
1816         # failed terminal else match
1817         return unless @{$rgroups} > 1;
1818         $rgroups->[-2]->[2] = -1;
1819         return;
1820     } ## end sub block_penultimate_match
1821
1822     sub sweep_top_down {
1823         my ( $self, $rlines, $group_level ) = @_;
1824
1825         # Partition the set of lines into final alignment subgroups
1826         # and store the alignments with the lines.
1827
1828         # The alignment subgroups we are making here are groups of consecutive
1829         # lines which have (1) identical alignment tokens and (2) do not
1830         # exceed the allowable maximum line length.  A later sweep from
1831         # left-to-right ('sweep_lr') will handle additional alignments.
1832
1833         # transfer args to closure variables
1834         $rall_lines = $rlines;
1835         $grp_level  = $group_level;
1836         $rgroups    = [];
1837         initialize_for_new_rgroup();
1838         return unless @{$rlines};    # shouldn't happen
1839
1840         # Unset the _end_group flag for the last line if it it set because it
1841         # is not needed and can causes problems for -lp formatting
1842         $rall_lines->[-1]->{'end_group'} = 0;
1843
1844         # Loop over all lines ...
1845         my $jline = -1;
1846         foreach my $new_line ( @{$rall_lines} ) {
1847             $jline++;
1848
1849             # Start a new subgroup if necessary
1850             if ( !$group_line_count ) {
1851                 add_to_rgroup($jline);
1852                 if ( $new_line->{'end_group'} ) {
1853                     end_rgroup(-1);
1854                 }
1855                 next;
1856             }
1857
1858             my $j_terminal_match = $new_line->{'j_terminal_match'};
1859             my ( $jbeg, $jend ) = get_rgroup_jrange();
1860             if ( !defined($jbeg) ) {
1861
1862                 # safety check, shouldn't happen
1863                 warning(<<EOM);
1864 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1865 undefined index for group line count $group_line_count
1866 EOM
1867                 $jbeg = $jline;
1868             }
1869             my $base_line = $rall_lines->[$jbeg];
1870
1871             # Initialize a global flag saying if the last line of the group
1872             # should match end of group and also terminate the group.  There
1873             # should be no returns between here and where the flag is handled
1874             # at the bottom.
1875             my $col_matching_terminal = 0;
1876             if ( defined($j_terminal_match) ) {
1877
1878                 # remember the column of the terminal ? or { to match with
1879                 $col_matching_terminal =
1880                   $base_line->get_column($j_terminal_match);
1881
1882                 # Ignore an undefined value as a defensive step; shouldn't
1883                 # normally happen.
1884                 $col_matching_terminal = 0
1885                   unless defined($col_matching_terminal);
1886             }
1887
1888             # -------------------------------------------------------------
1889             # Allow hanging side comment to join current group, if any.  The
1890             # only advantage is to keep the other tokens in the same group. For
1891             # example, this would make the '=' align here:
1892             #  $ax         = 1;           # side comment
1893             #                             # hanging side comment
1894             #  $boondoggle = 5;           # side comment
1895             #  $beetle     = 5;           # side comment
1896
1897             # here is another example..
1898
1899             #  _rtoc_name_count   => {},                   # hash to track ..
1900             #  _rpackage_stack    => [],                   # stack to check ..
1901             #                                              # name changes
1902             #  _rlast_level       => \$last_level,         # brace indentation
1903             #
1904             #
1905             # If this were not desired, the next step could be skipped.
1906             # -------------------------------------------------------------
1907             if ( $new_line->{'is_hanging_side_comment'} ) {
1908                 join_hanging_comment( $new_line, $base_line );
1909             }
1910
1911             # If this line has no matching tokens, then flush out the lines
1912             # BEFORE this line unless both it and the previous line have side
1913             # comments.  This prevents this line from pushing side comments out
1914             # to the right.
1915             elsif ( $new_line->{'jmax'} == 1 ) {
1916
1917                 # There are no matching tokens, so now check side comments.
1918                 # Programming note: accessing arrays with index -1 is
1919                 # risky in Perl, but we have verified there is at least one
1920                 # line in the group and that there is at least one field.
1921                 my $prev_comment =
1922                   $rall_lines->[ $jline - 1 ]->{'rfields'}->[-1];
1923                 my $side_comment = $new_line->{'rfields'}->[-1];
1924                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1925             }
1926
1927             # See if the new line matches and fits the current group,
1928             # if it still exists. Flush the current group if not.
1929             my $match_code;
1930             if ($group_line_count) {
1931                 ( $match_code, my $imax_align ) =
1932                   $self->check_match( $new_line, $base_line,
1933                     $rall_lines->[ $jline - 1 ] );
1934                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1935             }
1936
1937             # Store the new line
1938             add_to_rgroup($jline);
1939
1940             if ( defined($j_terminal_match) ) {
1941
1942                 # Decide if we should fix a terminal match. We can either:
1943                 # 1. fix it and prevent the sweep_lr from changing it, or
1944                 # 2. leave it alone and let sweep_lr try to fix it.
1945
1946                 # The current logic is to fix it if:
1947                 # -it has not joined to previous lines,
1948                 # -and either the previous subgroup has just 1 line, or
1949                 # -this line matched but did not fit (so sweep won't work)
1950                 my $fixit;
1951                 if ( $group_line_count == 1 ) {
1952                     $fixit ||= $match_code;
1953                     if ( !$fixit ) {
1954                         if ( @{$rgroups} > 1 ) {
1955                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1956                             my $nlines = $jendx - $jbegx + 1;
1957                             $fixit ||= $nlines <= 1;
1958                         }
1959                     }
1960                 }
1961
1962                 if ($fixit) {
1963                     $base_line = $new_line;
1964                     my $col_now = $base_line->get_column($j_terminal_match);
1965
1966                     # Ignore an undefined value as a defensive step; shouldn't
1967                     # normally happen.
1968                     $col_now = 0 unless defined($col_now);
1969
1970                     my $pad = $col_matching_terminal - $col_now;
1971                     my $padding_available =
1972                       $base_line->get_available_space_on_right();
1973                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1974                         $base_line->increase_field_width( $j_terminal_match,
1975                             $pad );
1976                     }
1977
1978                     # do not let sweep_left_to_right change an isolated 'else'
1979                     if ( !$new_line->{'is_terminal_ternary'} ) {
1980                         block_penultimate_match();
1981                     }
1982                 }
1983                 end_rgroup(-1);
1984             }
1985
1986             # end the group if we know we cannot match next line.
1987             elsif ( $new_line->{'end_group'} ) {
1988                 end_rgroup(-1);
1989             }
1990         } ## end loop over lines
1991
1992         end_rgroup(-1);
1993         return ($rgroups);
1994     } ## end sub sweep_top_down
1995 }
1996
1997 sub two_line_pad {
1998
1999     my ( $line_m, $line, $imax_min ) = @_;
2000
2001     # Given:
2002     #  two isolated (list) lines
2003     #  imax_min = number of common alignment tokens
2004     # Return:
2005     #  $pad_max = maximum suggested pad distance
2006     #           = 0 if alignment not recommended
2007     # Note that this is only for two lines which do not have alignment tokens
2008     # in common with any other lines.  It is intended for lists, but it might
2009     # also be used for two non-list lines with a common leading '='.
2010
2011     # Allow alignment if the difference in the two unpadded line lengths
2012     # is not more than either line length.  The idea is to avoid
2013     # aligning lines with very different field lengths, like these two:
2014
2015     #   [
2016     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
2017     #       1, 0, 0, 0, undef, 0, 0
2018     #   ];
2019     my $rfield_lengths   = $line->{'rfield_lengths'};
2020     my $rfield_lengths_m = $line_m->{'rfield_lengths'};
2021
2022     # Safety check - shouldn't happen
2023     return 0
2024       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
2025
2026     my $lensum_m = 0;
2027     my $lensum   = 0;
2028     foreach my $i ( 0 .. $imax_min ) {
2029         $lensum_m += $rfield_lengths_m->[$i];
2030         $lensum   += $rfield_lengths->[$i];
2031     }
2032
2033     my ( $lenmin, $lenmax ) =
2034       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
2035
2036     my $patterns_match;
2037     if ( $line_m->{'list_type'} && $line->{'list_type'} ) {
2038         $patterns_match = 1;
2039         my $rpatterns_m = $line_m->{'rpatterns'};
2040         my $rpatterns   = $line->{'rpatterns'};
2041         foreach my $i ( 0 .. $imax_min ) {
2042             my $pat   = $rpatterns->[$i];
2043             my $pat_m = $rpatterns_m->[$i];
2044             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
2045         }
2046     }
2047
2048     my $pad_max = $lenmax;
2049     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
2050
2051     return $pad_max;
2052 } ## end sub two_line_pad
2053
2054 sub sweep_left_to_right {
2055
2056     my ( $rlines, $rgroups, $group_level ) = @_;
2057
2058     # So far we have divided the lines into groups having an equal number of
2059     # identical alignments.  Here we are going to look for common leading
2060     # alignments between the different groups and align them when possible.
2061     # For example, the three lines below are in three groups because each line
2062     # has a different number of commas.  In this routine we will sweep from
2063     # left to right, aligning the leading commas as we go, but stopping if we
2064     # hit the line length limit.
2065
2066     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
2067     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
2068     #  my ( $km,  $area, $varea );
2069
2070     # nothing to do if just one group
2071     my $ng_max = @{$rgroups} - 1;
2072     return unless ( $ng_max > 0 );
2073
2074     #---------------------------------------------------------------------
2075     # Step 1: Loop over groups to find all common leading alignment tokens
2076     #---------------------------------------------------------------------
2077
2078     my $line;
2079     my $rtokens;
2080     my $imax;     # index of maximum non-side-comment alignment token
2081     my $istop;    # an optional stopping index
2082     my $jbeg;     # starting line index
2083     my $jend;     # ending line index
2084
2085     my $line_m;
2086     my $rtokens_m;
2087     my $imax_m;
2088     my $istop_m;
2089     my $jbeg_m;
2090     my $jend_m;
2091
2092     my $istop_mm;
2093
2094     # Look at neighboring pairs of groups and form a simple list
2095     # of all common leading alignment tokens. Foreach such match we
2096     # store [$i, $ng], where
2097     #  $i = index of the token in the line (0,1,...)
2098     #  $ng is the second of the two groups with this common token
2099     my @icommon;
2100
2101     # Hash to hold the maximum alignment change for any group
2102     my %max_move;
2103
2104     # a small number of columns
2105     my $short_pad = 4;
2106
2107     my $ng = -1;
2108     foreach my $item ( @{$rgroups} ) {
2109         $ng++;
2110
2111         $istop_mm = $istop_m;
2112
2113         # save _m values of previous group
2114         $line_m    = $line;
2115         $rtokens_m = $rtokens;
2116         $imax_m    = $imax;
2117         $istop_m   = $istop;
2118         $jbeg_m    = $jbeg;
2119         $jend_m    = $jend;
2120
2121         # Get values for this group. Note that we just have to use values for
2122         # one of the lines of the group since all members have the same
2123         # alignments.
2124         ( $jbeg, $jend, $istop ) = @{$item};
2125
2126         $line    = $rlines->[$jbeg];
2127         $rtokens = $line->{'rtokens'};
2128         $imax    = $line->{'jmax'} - 2;
2129         $istop   = -1 unless ( defined($istop) );
2130         $istop   = $imax if ( $istop > $imax );
2131
2132         # Initialize on first group
2133         next if ( $ng == 0 );
2134
2135         # Use the minimum index limit of the two groups
2136         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2137
2138         # Also impose a limit if given.
2139         if ( $istop_m < $imax_min ) {
2140             $imax_min = $istop_m;
2141         }
2142
2143         # Special treatment of two one-line groups isolated from other lines,
2144         # unless they form a simple list or a terminal match.  Otherwise the
2145         # alignment can look strange in some cases.
2146         my $list_type = $rlines->[$jbeg]->{'list_type'};
2147         if (
2148                $jend == $jbeg
2149             && $jend_m == $jbeg_m
2150             && ( $ng == 1 || $istop_mm < 0 )
2151             && ( $ng == $ng_max || $istop < 0 )
2152             && !$line->{'j_terminal_match'}
2153
2154             # Only do this for imperfect matches. This is normally true except
2155             # when two perfect matches cannot form a group because the line
2156             # length limit would be exceeded. In that case we can still try
2157             # to match as many alignments as possible.
2158             && ( $imax != $imax_m || $istop_m != $imax_m )
2159           )
2160         {
2161
2162             # We will just align assignments and simple lists
2163             next unless ( $imax_min >= 0 );
2164             next
2165               unless ( $rtokens->[0] =~ /^=\d/
2166                 || $list_type );
2167
2168             # In this case we will limit padding to a short distance.  This
2169             # is a compromise to keep some vertical alignment but prevent large
2170             # gaps, which do not look good for just two lines.
2171             my $pad_max =
2172               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2173             next unless ($pad_max);
2174             my $ng_m = $ng - 1;
2175             $max_move{"$ng_m"} = $pad_max;
2176             $max_move{"$ng"}   = $pad_max;
2177         }
2178
2179         # Loop to find all common leading tokens.
2180         if ( $imax_min >= 0 ) {
2181             foreach my $i ( 0 .. $imax_min ) {
2182                 my $tok   = $rtokens->[$i];
2183                 my $tok_m = $rtokens_m->[$i];
2184                 last if ( $tok ne $tok_m );
2185                 push @icommon, [ $i, $ng, $tok ];
2186             }
2187         }
2188     }
2189     return unless @icommon;
2190
2191     #----------------------------------------------------------
2192     # Step 2: Reorder and consolidate the list into a task list
2193     #----------------------------------------------------------
2194
2195     # We have to work first from lowest token index to highest, then by group,
2196     # sort our list first on token index then group number
2197     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
2198
2199     # Make a task list of the form
2200     #   [$i, ng_beg, $ng_end, $tok], ..
2201     # where
2202     #   $i is the index of the token to be aligned
2203     #   $ng_beg..$ng_end is the group range for this action
2204     my @todo;
2205     my ( $i, $ng_end, $tok );
2206     foreach my $item (@icommon) {
2207         my $ng_last = $ng_end;
2208         my $i_last  = $i;
2209         ( $i, $ng_end, $tok ) = @{$item};
2210         my $ng_beg = $ng_end - 1;
2211         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
2212             my $var = pop(@todo);
2213             $ng_beg = $var->[1];
2214         }
2215         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2216         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2217     }
2218
2219     #------------------------------
2220     # Step 3: Execute the task list
2221     #------------------------------
2222     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2223         $group_level );
2224     return;
2225 } ## end sub sweep_left_to_right
2226
2227 {    ## closure for sub do_left_to_right_sweep
2228
2229     my %is_good_alignment_token;
2230
2231     BEGIN {
2232
2233         # One of the most difficult aspects of vertical alignment is knowing
2234         # when not to align.  Alignment can go from looking very nice to very
2235         # bad when overdone.  In the sweep algorithm there are two special
2236         # cases where we may need to limit padding to a '$short_pad' distance
2237         # to avoid some very ugly formatting:
2238
2239         # 1. Two isolated lines with partial alignment
2240         # 2. A 'tail-wag-dog' situation, in which a single terminal
2241         #    line with partial alignment could cause a significant pad
2242         #    increase in many previous lines if allowed to join the alignment.
2243
2244         # For most alignment tokens, we will allow only a small pad to be
2245         # introduced (the hardwired $short_pad variable) . But for some 'good'
2246         # alignments we can be less restrictive.
2247
2248         # These are 'good' alignments, which are allowed more padding:
2249         my @q = qw(
2250           => = ? if unless or || {
2251         );
2252         push @q, ',';
2253         @is_good_alignment_token{@q} = (0) x scalar(@q);
2254
2255         # Promote a few of these to 'best', with essentially no pad limit:
2256         $is_good_alignment_token{'='}      = 1;
2257         $is_good_alignment_token{'if'}     = 1;
2258         $is_good_alignment_token{'unless'} = 1;
2259         $is_good_alignment_token{'=>'}     = 1
2260
2261           # Note the hash values are set so that:
2262           #         if ($is_good_alignment_token{$raw_tok}) => best
2263           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2264
2265     } ## end BEGIN
2266
2267     sub move_to_common_column {
2268
2269         # This is a sub called by sub do_left_to_right_sweep to
2270         # move the alignment column of token $itok to $col_want for a
2271         # sequence of groups.
2272         my ( $rlines, $rgroups, $rmax_move, $ngb, $nge, $itok, $col_want,
2273             $raw_tok )
2274           = @_;
2275         return unless ( defined($ngb) && $nge > $ngb );
2276         foreach my $ng ( $ngb .. $nge ) {
2277
2278             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2279             my $line = $rlines->[$jbeg];
2280             my $col  = $line->get_column($itok);
2281             my $move = $col_want - $col;
2282             if ( $move > 0 ) {
2283
2284                 # limit padding increase in isolated two lines
2285                 next
2286                   if ( defined( $rmax_move->{$ng} )
2287                     && $move > $rmax_move->{$ng}
2288                     && !$is_good_alignment_token{$raw_tok} );
2289
2290                 $line->increase_field_width( $itok, $move );
2291             }
2292             elsif ( $move < 0 ) {
2293
2294                 # spot to take special action on failure to move
2295             }
2296         }
2297         return;
2298     } ## end sub move_to_common_column
2299
2300     sub do_left_to_right_sweep {
2301         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2302           = @_;
2303
2304         # $blocking_level[$nj is the level at a match failure between groups
2305         # $ng-1 and $ng
2306         my @blocking_level;
2307         my $group_list_type = $rlines->[0]->{'list_type'};
2308
2309         foreach my $task ( @{$rtodo} ) {
2310             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2311
2312             # Nothing to do for a single group
2313             next unless ( $ng_end > $ng_beg );
2314
2315             my $ng_first;  # index of the first group of a continuous sequence
2316             my $col_want;  # the common alignment column of a sequence of groups
2317             my $col_limit; # maximum column before bumping into max line length
2318             my $line_count_ng_m = 0;
2319             my $jmax_m;
2320             my $it_stop_m;
2321
2322             # Loop over the groups
2323             # 'ix_' = index in the array of lines
2324             # 'ng_' = index in the array of groups
2325             # 'it_' = index in the array of tokens
2326             my $ix_min      = $rgroups->[$ng_beg]->[0];
2327             my $ix_max      = $rgroups->[$ng_end]->[1];
2328             my $lines_total = $ix_max - $ix_min + 1;
2329             foreach my $ng ( $ng_beg .. $ng_end ) {
2330                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2331                 my $line_count_ng = $ix_end - $ix_beg + 1;
2332
2333                 # Important: note that since all lines in a group have a common
2334                 # alignments object, we just have to work on one of the lines
2335                 # (the first line).  All of the rest will be changed
2336                 # automatically.
2337                 my $line = $rlines->[$ix_beg];
2338                 my $jmax = $line->{'jmax'};
2339
2340                 # the maximum space without exceeding the line length:
2341                 my $avail   = $line->get_available_space_on_right();
2342                 my $col     = $line->get_column($itok);
2343                 my $col_max = $col + $avail;
2344
2345                 # Initialize on first group
2346                 if ( !defined($col_want) ) {
2347                     $ng_first        = $ng;
2348                     $col_want        = $col;
2349                     $col_limit       = $col_max;
2350                     $line_count_ng_m = $line_count_ng;
2351                     $jmax_m          = $jmax;
2352                     $it_stop_m       = $it_stop;
2353                     next;
2354                 }
2355
2356                 # RULE: Throw a blocking flag upon encountering a token level
2357                 # different from the level of the first blocking token.  For
2358                 # example, in the following example, if the = matches get
2359                 # blocked between two groups as shown, then we want to start
2360                 # blocking matches at the commas, which are at deeper level, so
2361                 # that we do not get the big gaps shown here:
2362
2363                 #  my $unknown3 = pack( "v",          -2 );
2364                 #  my $unknown4 = pack( "v",          0x09 );
2365                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
2366                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
2367                 #  my $root_startblock = pack( "V",   $root_start );
2368                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
2369
2370                 # On the other hand, it is okay to keep matching at the same
2371                 # level such as in a simple list of commas and/or fat commas.
2372
2373                 my $is_blocked = defined( $blocking_level[$ng] )
2374                   && $lev > $blocking_level[$ng];
2375
2376                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2377                 # Do not let one or two lines with a **different number of
2378                 # alignments** open up a big gap in a large block.  For
2379                 # example, we will prevent something like this, where the first
2380                 # line pries open the rest:
2381
2382             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2383             #  $worksheet->write( "C7", "",                    $format );
2384             #  $worksheet->write( "D7", "",                    $format );
2385             #  $worksheet->write( "D8", "",                    $format );
2386             #  $worksheet->write( "D8", "",                    $format );
2387
2388                 # We should exclude from consideration two groups which are
2389                 # effectively the same but separated because one does not
2390                 # fit in the maximum allowed line length.
2391                 my $is_same_group =
2392                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2393
2394                 my $lines_above = $ix_beg - $ix_min;
2395                 my $lines_below = $lines_total - $lines_above;
2396
2397                 # Increase the tolerable gap for certain favorable factors
2398                 my $factor    = 1;
2399                 my $top_level = $lev == $group_level;
2400
2401                 # Align best top level alignment tokens like '=', 'if', ...
2402                 # A factor of 10 allows a gap of up to 40 spaces
2403                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2404                     $factor = 10;
2405                 }
2406
2407                 # Otherwise allow some minimal padding of good alignments
2408                 elsif (
2409
2410                     defined( $is_good_alignment_token{$raw_tok} )
2411
2412                     # We have to be careful if there are just 2 lines.  This
2413                     # two-line factor allows large gaps only for 2 lines which
2414                     # are simple lists with fewer items on the second line. It
2415                     # gives results similar to previous versions of perltidy.
2416                     && (   $lines_total > 2
2417                         || $group_list_type && $jmax < $jmax_m && $top_level )
2418                   )
2419                 {
2420                     $factor += 1;
2421                     if ($top_level) {
2422                         $factor += 1;
2423                     }
2424                 }
2425
2426                 my $is_big_gap;
2427                 if ( !$is_same_group ) {
2428                     $is_big_gap ||=
2429                       (      $lines_above == 1
2430                           || $lines_above == 2 && $lines_below >= 4 )
2431                       && $col_want > $col + $short_pad * $factor;
2432                     $is_big_gap ||=
2433                       (      $lines_below == 1
2434                           || $lines_below == 2 && $lines_above >= 4 )
2435                       && $col > $col_want + $short_pad * $factor;
2436                 }
2437
2438                 # if match is limited by gap size, stop aligning at this level
2439                 if ($is_big_gap) {
2440                     $blocking_level[$ng] = $lev - 1;
2441                 }
2442
2443                 # quit and restart if it cannot join this batch
2444                 if (   $col_want > $col_max
2445                     || $col > $col_limit
2446                     || $is_big_gap
2447                     || $is_blocked )
2448                 {
2449
2450                     # remember the level of the first blocking token
2451                     if ( !defined( $blocking_level[$ng] ) ) {
2452                         $blocking_level[$ng] = $lev;
2453                     }
2454
2455                     move_to_common_column(
2456                         $rlines, $rgroups, $rmax_move, $ng_first,
2457                         $ng - 1, $itok,    $col_want,  $raw_tok
2458                     );
2459                     $ng_first        = $ng;
2460                     $col_want        = $col;
2461                     $col_limit       = $col_max;
2462                     $line_count_ng_m = $line_count_ng;
2463                     $jmax_m          = $jmax;
2464                     $it_stop_m       = $it_stop;
2465                     next;
2466                 }
2467
2468                 $line_count_ng_m += $line_count_ng;
2469
2470                 # update the common column and limit
2471                 if ( $col > $col_want )      { $col_want  = $col }
2472                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2473
2474             } ## end loop over groups
2475
2476             if ( $ng_end > $ng_first ) {
2477                 move_to_common_column(
2478                     $rlines, $rgroups, $rmax_move, $ng_first,
2479                     $ng_end, $itok,    $col_want,  $raw_tok
2480                 );
2481             } ## end loop over groups for one task
2482         } ## end loop over tasks
2483
2484         return;
2485     } ## end sub do_left_to_right_sweep
2486 }
2487
2488 sub delete_selected_tokens {
2489
2490     my ( $line_obj, $ridel ) = @_;
2491
2492     # $line_obj    is the line to be modified
2493     # $ridel       is a ref to list of indexes to be deleted
2494
2495     # remove an unused alignment token(s) to improve alignment chances
2496
2497     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2498
2499     my $jmax_old           = $line_obj->{'jmax'};
2500     my $rfields_old        = $line_obj->{'rfields'};
2501     my $rfield_lengths_old = $line_obj->{'rfield_lengths'};
2502     my $rpatterns_old      = $line_obj->{'rpatterns'};
2503     my $rtokens_old        = $line_obj->{'rtokens'};
2504     my $j_terminal_match   = $line_obj->{'j_terminal_match'};
2505
2506     use constant EXPLAIN_DELETE_SELECTED => 0;
2507
2508     local $LIST_SEPARATOR = '> <';
2509     EXPLAIN_DELETE_SELECTED && print <<EOM;
2510 delete indexes: <@{$ridel}>
2511 old jmax: $jmax_old
2512 old tokens: <@{$rtokens_old}>
2513 old patterns: <@{$rpatterns_old}>
2514 old fields: <@{$rfields_old}>
2515 old field_lengths: <@{$rfield_lengths_old}>
2516 EOM
2517
2518     my $rfields_new        = [];
2519     my $rpatterns_new      = [];
2520     my $rtokens_new        = [];
2521     my $rfield_lengths_new = [];
2522
2523     # Convert deletion list to a hash to allow any order, multiple entries,
2524     # and avoid problems with index values out of range
2525     my %delete_me;
2526     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2527
2528     my $pattern_0      = $rpatterns_old->[0];
2529     my $field_0        = $rfields_old->[0];
2530     my $field_length_0 = $rfield_lengths_old->[0];
2531     push @{$rfields_new},        $field_0;
2532     push @{$rfield_lengths_new}, $field_length_0;
2533     push @{$rpatterns_new},      $pattern_0;
2534
2535     # Loop to either copy items or concatenate fields and patterns
2536     my $jmin_del;
2537     foreach my $j ( 0 .. $jmax_old - 1 ) {
2538         my $token        = $rtokens_old->[$j];
2539         my $field        = $rfields_old->[ $j + 1 ];
2540         my $field_length = $rfield_lengths_old->[ $j + 1 ];
2541         my $pattern      = $rpatterns_old->[ $j + 1 ];
2542         if ( !$delete_me{$j} ) {
2543             push @{$rtokens_new},        $token;
2544             push @{$rfields_new},        $field;
2545             push @{$rpatterns_new},      $pattern;
2546             push @{$rfield_lengths_new}, $field_length;
2547         }
2548         else {
2549             if ( !defined($jmin_del) ) { $jmin_del = $j }
2550             $rfields_new->[-1] .= $field;
2551             $rfield_lengths_new->[-1] += $field_length;
2552             $rpatterns_new->[-1] .= $pattern;
2553         }
2554     }
2555
2556     # ----- x ------ x ------ x ------
2557     #t      0        1        2        <- token indexing
2558     #f   0      1        2        3    <- field and pattern
2559
2560     my $jmax_new = @{$rfields_new} - 1;
2561     $line_obj->{'rtokens'}        = $rtokens_new;
2562     $line_obj->{'rpatterns'}      = $rpatterns_new;
2563     $line_obj->{'rfields'}        = $rfields_new;
2564     $line_obj->{'rfield_lengths'} = $rfield_lengths_new;
2565     $line_obj->{'jmax'}           = $jmax_new;
2566
2567     # The value of j_terminal_match will be incorrect if we delete tokens prior
2568     # to it. We will have to give up on aligning the terminal tokens if this
2569     # happens.
2570     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2571         $line_obj->{'j_terminal_match'} = undef;
2572     }
2573
2574     # update list type -
2575     if ( $line_obj->{'list_seqno'} ) {
2576
2577         ## This works, but for efficiency see if we need to make a change:
2578         ## decide_if_list($line_obj);
2579
2580         # An existing list will still be a list but with possibly different
2581         # leading token
2582         my $old_list_type = $line_obj->{'list_type'};
2583         my $new_list_type = EMPTY_STRING;
2584         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2585             $new_list_type = $rtokens_new->[0];
2586         }
2587         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2588             decide_if_list($line_obj);
2589         }
2590     }
2591
2592     EXPLAIN_DELETE_SELECTED && print <<EOM;
2593
2594 new jmax: $jmax_new
2595 new tokens: <@{$rtokens_new}>
2596 new patterns: <@{$rpatterns_new}>
2597 new fields: <@{$rfields_new}>
2598 EOM
2599     return;
2600 } ## end sub delete_selected_tokens
2601
2602 {    ## closure for sub decode_alignment_token
2603
2604     # This routine is called repeatedly for each token, so it needs to be
2605     # efficient.  We can speed things up by remembering the inputs and outputs
2606     # in a hash.
2607     my %decoded_token;
2608
2609     sub initialize_decode {
2610
2611         # We will re-initialize the hash for each file. Otherwise, there is
2612         # a danger that the hash can become arbitrarily large if a very large
2613         # number of files is processed at once.
2614         %decoded_token = ();
2615         return;
2616     } ## end sub initialize_decode
2617
2618     sub decode_alignment_token {
2619
2620         # Unpack the values packed in an alignment token
2621         #
2622         # Usage:
2623         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
2624         #          decode_alignment_token($token);
2625
2626         # Alignment tokens have a trailing decimal level and optional tag (for
2627         # commas):
2628         # For example, the first comma in the following line
2629         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
2630         # is decorated as follows:
2631         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
2632
2633         # An optional token count may be appended with a leading dot.
2634         # Currently this is only done for '=' tokens but this could change.
2635         # For example, consider the following line:
2636         #   $nport   = $port = shift || $name;
2637         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2638         # The second '=' will be '=0.2' [level 0, second equals]
2639         my ($tok) = @_;
2640
2641         if ( defined( $decoded_token{$tok} ) ) {
2642             return @{ $decoded_token{$tok} };
2643         }
2644
2645         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, EMPTY_STRING, 1 );
2646         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2647             $raw_tok   = $1;
2648             $lev       = $2;
2649             $tag       = $3 if ($3);
2650             $tok_count = $5 if ($5);
2651         }
2652         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2653         $decoded_token{$tok} = \@vals;
2654         return @vals;
2655     } ## end sub decode_alignment_token
2656 }
2657
2658 {    ## closure for sub delete_unmatched_tokens
2659
2660     my %is_assignment;
2661     my %keep_after_deleted_assignment;
2662
2663     BEGIN {
2664         my @q;
2665
2666         @q = qw(
2667           = **= += *= &= <<= &&=
2668           -= /= |= >>= ||= //=
2669           .= %= ^=
2670           x=
2671         );
2672         @is_assignment{@q} = (1) x scalar(@q);
2673
2674         # These tokens may be kept following an = deletion
2675         @q = qw(
2676           if unless or ||
2677         );
2678         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2679
2680     } ## end BEGIN
2681
2682     sub delete_unmatched_tokens {
2683         my ( $rlines, $group_level ) = @_;
2684
2685         # This is a important first step in vertical alignment in which
2686         # we remove as many obviously un-needed alignment tokens as possible.
2687         # This will prevent them from interfering with the final alignment.
2688
2689         # Returns:
2690         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
2691         my $saw_side_comment = 0;    # used to avoid a call for side comments
2692
2693         # Handle no lines -- shouldn't happen
2694         return unless @{$rlines};
2695
2696         # Handle a single line
2697         if ( @{$rlines} == 1 ) {
2698             my $line   = $rlines->[0];
2699             my $jmax   = $line->{'jmax'};
2700             my $length = $line->{'rfield_lengths'}->[$jmax];
2701             $saw_side_comment = $length > 0;
2702             return ( $max_lev_diff, $saw_side_comment );
2703         }
2704
2705         # ignore hanging side comments in these operations
2706         my @filtered   = grep { !$_->{'is_hanging_side_comment'} } @{$rlines};
2707         my $rnew_lines = \@filtered;
2708
2709         $saw_side_comment = @filtered != @{$rlines};
2710         $max_lev_diff     = 0;
2711
2712         # nothing to do if all lines were hanging side comments
2713         my $jmax = @{$rnew_lines} - 1;
2714         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2715
2716         #----------------------------------------------------
2717         # Create a hash of alignment token info for each line
2718         #----------------------------------------------------
2719         ( my $rline_hashes, my $requals_info, $saw_side_comment, $max_lev_diff )
2720           = make_alignment_info( $group_level, $rnew_lines, $saw_side_comment );
2721
2722         #------------------------------------------------------------
2723         # Find independent subgroups of lines.  Neighboring subgroups
2724         # do not have a common alignment token.
2725         #------------------------------------------------------------
2726         my @subgroups;
2727         push @subgroups, [ 0, $jmax ];
2728         foreach my $jl ( 0 .. $jmax - 1 ) {
2729             if ( $rnew_lines->[$jl]->{'end_group'} ) {
2730                 $subgroups[-1]->[1] = $jl;
2731                 push @subgroups, [ $jl + 1, $jmax ];
2732             }
2733         }
2734
2735         #-----------------------------------------------------------
2736         # PASS 1 over subgroups to remove unmatched alignment tokens
2737         #-----------------------------------------------------------
2738         delete_unmatched_tokens_main_loop(
2739             $group_level,  $rnew_lines, \@subgroups,
2740             $rline_hashes, $requals_info
2741         );
2742
2743         #----------------------------------------------------------------
2744         # PASS 2: Construct a tree of matched lines and delete some small
2745         # deeper levels of tokens.  They also block good alignments.
2746         #----------------------------------------------------------------
2747         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2748
2749         #--------------------------------------------
2750         # PASS 3: compare all lines for common tokens
2751         #--------------------------------------------
2752         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2753
2754         return ( $max_lev_diff, $saw_side_comment );
2755     } ## end sub delete_unmatched_tokens
2756
2757     sub make_alignment_info {
2758
2759         my ( $group_level, $rnew_lines, $saw_side_comment ) = @_;
2760
2761         #------------------------------------------------------------
2762         # Loop to create a hash of alignment token info for each line
2763         #------------------------------------------------------------
2764         my $rline_hashes = [];
2765         my @equals_info;
2766         my @line_info;    # no longer used
2767         my $jmax         = @{$rnew_lines} - 1;
2768         my $max_lev_diff = 0;
2769         foreach my $line ( @{$rnew_lines} ) {
2770             my $rhash     = {};
2771             my $rtokens   = $line->{'rtokens'};
2772             my $rpatterns = $line->{'rpatterns'};
2773             my $i         = 0;
2774             my ( $i_eq, $tok_eq, $pat_eq );
2775             my ( $lev_min, $lev_max );
2776             foreach my $tok ( @{$rtokens} ) {
2777                 my ( $raw_tok, $lev, $tag, $tok_count ) =
2778                   decode_alignment_token($tok);
2779
2780                 if ( $tok ne '#' ) {
2781                     if ( !defined($lev_min) ) {
2782                         $lev_min = $lev;
2783                         $lev_max = $lev;
2784                     }
2785                     else {
2786                         if ( $lev < $lev_min ) { $lev_min = $lev }
2787                         if ( $lev > $lev_max ) { $lev_max = $lev }
2788                     }
2789                 }
2790                 else {
2791                     if ( !$saw_side_comment ) {
2792                         my $length = $line->{'rfield_lengths'}->[ $i + 1 ];
2793                         $saw_side_comment ||= $length;
2794                     }
2795                 }
2796
2797                 # Possible future upgrade: for multiple matches,
2798                 # record [$i1, $i2, ..] instead of $i
2799                 $rhash->{$tok} =
2800                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2801
2802                 # remember the first equals at line level
2803                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2804
2805                     if ( $lev eq $group_level ) {
2806                         $i_eq   = $i;
2807                         $tok_eq = $tok;
2808                         $pat_eq = $rpatterns->[$i];
2809                     }
2810                 }
2811                 $i++;
2812             }
2813             push @{$rline_hashes}, $rhash;
2814             push @equals_info,     [ $i_eq,    $tok_eq, $pat_eq ];
2815             push @line_info,       [ $lev_min, $lev_max ];
2816             if ( defined($lev_min) ) {
2817                 my $lev_diff = $lev_max - $lev_min;
2818                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2819             }
2820         }
2821
2822         #----------------------------------------------------
2823         # Loop to compare each line pair and remember matches
2824         #----------------------------------------------------
2825         my $rtok_hash = {};
2826         my $nr        = 0;
2827         foreach my $jl ( 0 .. $jmax - 1 ) {
2828             my $nl = $nr;
2829             $nr = 0;
2830             my $jr      = $jl + 1;
2831             my $rhash_l = $rline_hashes->[$jl];
2832             my $rhash_r = $rline_hashes->[$jr];
2833             foreach my $tok ( keys %{$rhash_l} ) {
2834                 if ( defined( $rhash_r->{$tok} ) ) {
2835                     my $il = $rhash_l->{$tok}->[0];
2836                     my $ir = $rhash_r->{$tok}->[0];
2837                     $rhash_l->{$tok}->[2] = $ir;
2838                     $rhash_r->{$tok}->[1] = $il;
2839                     if ( $tok ne '#' ) {
2840                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2841                         $nr++;
2842                     }
2843                 }
2844             }
2845
2846             # Set a line break if no matching tokens between these lines
2847             # (this is not strictly necessary now but does not hurt)
2848             if ( $nr == 0 && $nl > 0 ) {
2849                 $rnew_lines->[$jl]->{'end_group'} = 1;
2850             }
2851
2852             # Also set a line break if both lines have simple equals but with
2853             # different leading characters in patterns.  This check is similar
2854             # to one in sub check_match, and will prevent sub
2855             # prune_alignment_tree from removing alignments which otherwise
2856             # should be kept. This fix is rarely needed, but it can
2857             # occasionally improve formatting.
2858             # For example:
2859             #     my $name = $this->{Name};
2860             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
2861             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
2862             #     my $cast    = ( $type     ? "($type *)" : "" );
2863             # The last two lines start with 'my' and will not match the
2864             # previous line starting with $type, so we do not want
2865             # prune_alignment tree to delete their ? : alignments at a deeper
2866             # level.
2867             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2868             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2869             if ( defined($i_eq_l) && defined($i_eq_r) ) {
2870
2871                 # Also, do not align equals across a change in ci level
2872                 my $ci_jump = $rnew_lines->[$jl]->{'ci_level'} !=
2873                   $rnew_lines->[$jr]->{'ci_level'};
2874
2875                 if (
2876                        $tok_eq_l eq $tok_eq_r
2877                     && $i_eq_l == 0
2878                     && $i_eq_r == 0
2879                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2880                         || $ci_jump )
2881                   )
2882                 {
2883                     $rnew_lines->[$jl]->{'end_group'} = 1;
2884                 }
2885             }
2886         }
2887         return ( $rline_hashes, \@equals_info, $saw_side_comment,
2888             $max_lev_diff );
2889     } ## end sub make_alignment_info
2890
2891     sub delete_unmatched_tokens_main_loop {
2892
2893         my (
2894             $group_level,  $rnew_lines, $rsubgroups,
2895             $rline_hashes, $requals_info
2896         ) = @_;
2897
2898         #--------------------------------------------------------------
2899         # Main loop over subgroups to remove unmatched alignment tokens
2900         #--------------------------------------------------------------
2901
2902         # flag to allow skipping pass 2 - not currently used
2903         my $saw_large_group;
2904
2905         my $has_terminal_match = $rnew_lines->[-1]->{'j_terminal_match'};
2906
2907         foreach my $item ( @{$rsubgroups} ) {
2908             my ( $jbeg, $jend ) = @{$item};
2909
2910             my $nlines = $jend - $jbeg + 1;
2911
2912             #---------------------------------------------------
2913             # Look for complete if/elsif/else and ternary blocks
2914             #---------------------------------------------------
2915
2916             # We are looking for a common '$dividing_token' like these:
2917
2918             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
2919             #    elsif ($b)          { $p->{'type'} = 'b'; }
2920             #    elsif ($s)          { $p->{'type'} = 's'; }
2921             #    else                { $p->{'type'} = ''; }
2922             #                        ^----------- dividing_token
2923
2924             #   my $severity =
2925             #      !$routine                     ? '[PFX]'
2926             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
2927             #     : $routine =~ /ck_warn/        ? 'W'
2928             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2929             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
2930             #     : $routine =~ /vWARN\d/        ? '[WDS]'
2931             #     :                                '[PFX]';
2932             #                                    ^----------- dividing_token
2933
2934             # Only look for groups which are more than 2 lines long.  Two lines
2935             # can get messed up doing this, probably due to the various
2936             # two-line rules.
2937
2938             my $dividing_token;
2939             my %token_line_count;
2940             if ( $nlines > 2 ) {
2941
2942                 foreach my $jj ( $jbeg .. $jend ) {
2943                     my %seen;
2944                     my $line    = $rnew_lines->[$jj];
2945                     my $rtokens = $line->{'rtokens'};
2946                     foreach my $tok ( @{$rtokens} ) {
2947                         if ( !$seen{$tok} ) {
2948                             $seen{$tok}++;
2949                             $token_line_count{$tok}++;
2950                         }
2951                     }
2952                 }
2953
2954                 foreach my $tok ( keys %token_line_count ) {
2955                     if ( $token_line_count{$tok} == $nlines ) {
2956                         if (   substr( $tok, 0, 1 ) eq '?'
2957                             || substr( $tok, 0, 1 ) eq '{'
2958                             && $tok =~ /^\{\d+if/ )
2959                         {
2960                             $dividing_token = $tok;
2961                             last;
2962                         }
2963                     }
2964                 }
2965             }
2966
2967             #-------------------------------------------------------------
2968             # Loop over subgroup lines to remove unwanted alignment tokens
2969             #-------------------------------------------------------------
2970             foreach my $jj ( $jbeg .. $jend ) {
2971                 my $line    = $rnew_lines->[$jj];
2972                 my $rtokens = $line->{'rtokens'};
2973                 my $rhash   = $rline_hashes->[$jj];
2974                 my $i_eq    = $requals_info->[$jj]->[0];
2975                 my @idel;
2976                 my $imax = @{$rtokens} - 2;
2977                 my $delete_above_level;
2978                 my $deleted_assignment_token;
2979
2980                 my $saw_dividing_token = EMPTY_STRING;
2981                 $saw_large_group ||= $nlines > 2 && $imax > 1;
2982
2983                 # Loop over all alignment tokens
2984                 foreach my $i ( 0 .. $imax ) {
2985                     my $tok = $rtokens->[$i];
2986                     next if ( $tok eq '#' );    # shouldn't happen
2987                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2988                       @{ $rhash->{$tok} };
2989
2990                     #------------------------------------------------------
2991                     # Here is the basic RULE: remove an unmatched alignment
2992                     # which does not occur in the surrounding lines.
2993                     #------------------------------------------------------
2994                     my $delete_me = !defined($il) && !defined($ir);
2995
2996                     # Apply any user controls. Note that not all lines pass
2997                     # this way so they have to be applied elsewhere too.
2998                     my $align_ok = 1;
2999                     if (%valign_control_hash) {
3000                         $align_ok = $valign_control_hash{$raw_tok};
3001                         $align_ok = $valign_control_default
3002                           unless defined($align_ok);
3003                         $delete_me ||= !$align_ok;
3004                     }
3005
3006                     # But now we modify this with exceptions...
3007
3008                     # EXCEPTION 1: If we are in a complete ternary or
3009                     # if/elsif/else group, and this token is not on every line
3010                     # of the group, should we delete it to preserve overall
3011                     # alignment?
3012                     if ($dividing_token) {
3013                         if ( $token_line_count{$tok} >= $nlines ) {
3014                             $saw_dividing_token ||= $tok eq $dividing_token;
3015                         }
3016                         else {
3017
3018                             # For shorter runs, delete toks to save alignment.
3019                             # For longer runs, keep toks after the '{' or '?'
3020                             # to allow sub-alignments within braces.  The
3021                             # number 5 lines is arbitrary but seems to work ok.
3022                             $delete_me ||=
3023                               ( $nlines < 5 || !$saw_dividing_token );
3024                         }
3025                     }
3026
3027                     # EXCEPTION 2: Remove all tokens above a certain level
3028                     # following a previous deletion.  For example, we have to
3029                     # remove tagged higher level alignment tokens following a
3030                     # '=>' deletion because the tags of higher level tokens
3031                     # will now be incorrect. For example, this will prevent
3032                     # aligning commas as follows after deleting the second '=>'
3033                     #    $w->insert(
3034                     #         ListBox => origin => [ 270, 160 ],
3035                     #         size    => [ 200,           55 ],
3036                     #    );
3037                     if ( defined($delete_above_level) ) {
3038                         if ( $lev > $delete_above_level ) {
3039                             $delete_me ||= 1;
3040                         }
3041                         else { $delete_above_level = undef }
3042                     }
3043
3044                     # EXCEPTION 3: Remove all but certain tokens after an
3045                     # assignment deletion.
3046                     if (
3047                         $deleted_assignment_token
3048                         && ( $lev > $group_level
3049                             || !$keep_after_deleted_assignment{$raw_tok} )
3050                       )
3051                     {
3052                         $delete_me ||= 1;
3053                     }
3054
3055                     # EXCEPTION 4: Do not touch the first line of a 2 line
3056                     # terminal match, such as below, because j_terminal has
3057                     # already been set.
3058                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
3059                     #    else      { $tago = $tagc = ''; }
3060                     # But see snippets 'else1.t' and 'else2.t'
3061                     $delete_me = 0
3062                       if ( $jj == $jbeg
3063                         && $has_terminal_match
3064                         && $nlines == 2 );
3065
3066                     # EXCEPTION 5: misc additional rules for commas and equals
3067                     if ( $delete_me && $tok_count == 1 ) {
3068
3069                         # okay to delete second and higher copies of a token
3070
3071                         # for a comma...
3072                         if ( $raw_tok eq ',' ) {
3073
3074                             # Do not delete commas before an equals
3075                             $delete_me = 0
3076                               if ( defined($i_eq) && $i < $i_eq );
3077
3078                             # Do not delete line-level commas
3079                             $delete_me = 0 if ( $lev <= $group_level );
3080                         }
3081
3082                         # For an assignment at group level..
3083                         if (   $is_assignment{$raw_tok}
3084                             && $lev == $group_level )
3085                         {
3086
3087                             # Do not delete if it is the last alignment of
3088                             # multiple tokens; this will prevent some
3089                             # undesirable alignments
3090                             if ( $imax > 0 && $i == $imax ) {
3091                                 $delete_me = 0;
3092                             }
3093
3094                             # Otherwise, set a flag to delete most
3095                             # remaining tokens
3096                             else { $deleted_assignment_token = $raw_tok }
3097                         }
3098                     }
3099
3100                     # Do not let a user exclusion be reactivated by above rules
3101                     $delete_me ||= !$align_ok;
3102
3103                     #------------------------------------
3104                     # Add this token to the deletion list
3105                     #------------------------------------
3106                     if ($delete_me) {
3107                         push @idel, $i;
3108
3109                         # update deletion propagation flags
3110                         if ( !defined($delete_above_level)
3111                             || $lev < $delete_above_level )
3112                         {
3113
3114                             # delete all following higher level alignments
3115                             $delete_above_level = $lev;
3116
3117                             # but keep deleting after => to next lower level
3118                             # to avoid some bizarre alignments
3119                             if ( $raw_tok eq '=>' ) {
3120                                 $delete_above_level = $lev - 1;
3121                             }
3122                         }
3123                     }
3124                 }    # End loop over alignment tokens
3125
3126                 # Process all deletion requests for this line
3127                 if (@idel) {
3128                     delete_selected_tokens( $line, \@idel );
3129                 }
3130             }    # End loopover lines
3131         } ## end main loop over subgroups
3132
3133         return;
3134     } ## end sub delete_unmatched_tokens_main_loop
3135 }
3136
3137 sub match_line_pairs {
3138     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3139
3140     # Compare each pair of lines and save information about common matches
3141     # $rlines     = list of lines including hanging side comments
3142     # $rnew_lines = list of lines without any hanging side comments
3143     # $rsubgroups = list of subgroups of the new lines
3144
3145     # TODO:
3146     # Maybe change: imax_pair => pair_match_info = ref to array
3147     #  = [$imax_align, $rMsg, ... ]
3148     #  This may eventually have multi-level match info
3149
3150     # Previous line vars
3151     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3152         $list_type_m, $ci_level_m );
3153
3154     # Current line vars
3155     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3156         $ci_level );
3157
3158     # loop over subgroups
3159     foreach my $item ( @{$rsubgroups} ) {
3160         my ( $jbeg, $jend ) = @{$item};
3161         my $nlines = $jend - $jbeg + 1;
3162         next unless ( $nlines > 1 );
3163
3164         # loop over lines in a subgroup
3165         foreach my $jj ( $jbeg .. $jend ) {
3166
3167             $line_m           = $line;
3168             $rtokens_m        = $rtokens;
3169             $rpatterns_m      = $rpatterns;
3170             $rfield_lengths_m = $rfield_lengths;
3171             $imax_m           = $imax;
3172             $list_type_m      = $list_type;
3173             $ci_level_m       = $ci_level;
3174
3175             $line           = $rnew_lines->[$jj];
3176             $rtokens        = $line->{'rtokens'};
3177             $rpatterns      = $line->{'rpatterns'};
3178             $rfield_lengths = $line->{'rfield_lengths'};
3179             $imax           = @{$rtokens} - 2;
3180             $list_type      = $line->{'list_type'};
3181             $ci_level       = $line->{'ci_level'};
3182
3183             # nothing to do for first line
3184             next if ( $jj == $jbeg );
3185
3186             my $ci_jump = $ci_level - $ci_level_m;
3187
3188             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3189
3190             my $imax_align = -1;
3191
3192             # find number of leading common tokens
3193
3194             #---------------------------------
3195             # No match to hanging side comment
3196             #---------------------------------
3197             if ( $line->{'is_hanging_side_comment'} ) {
3198
3199                 # Should not get here; HSC's have been filtered out
3200                 $imax_align = -1;
3201             }
3202
3203             #-----------------------------
3204             # Handle comma-separated lists
3205             #-----------------------------
3206             elsif ( $list_type && $list_type eq $list_type_m ) {
3207
3208                 # do not align lists across a ci jump with new list method
3209                 if ($ci_jump) { $imax_min = -1 }
3210
3211                 my $i_nomatch = $imax_min + 1;
3212                 foreach my $i ( 0 .. $imax_min ) {
3213                     my $tok   = $rtokens->[$i];
3214                     my $tok_m = $rtokens_m->[$i];
3215                     if ( $tok ne $tok_m ) {
3216                         $i_nomatch = $i;
3217                         last;
3218                     }
3219                 }
3220
3221                 $imax_align = $i_nomatch - 1;
3222             }
3223
3224             #-----------------
3225             # Handle non-lists
3226             #-----------------
3227             else {
3228                 my $i_nomatch = $imax_min + 1;
3229                 foreach my $i ( 0 .. $imax_min ) {
3230                     my $tok   = $rtokens->[$i];
3231                     my $tok_m = $rtokens_m->[$i];
3232                     if ( $tok ne $tok_m ) {
3233                         $i_nomatch = $i;
3234                         last;
3235                     }
3236
3237                     my $pat   = $rpatterns->[$i];
3238                     my $pat_m = $rpatterns_m->[$i];
3239
3240                     # If patterns don't match, we have to be careful...
3241                     if ( $pat_m ne $pat ) {
3242                         my $pad =
3243                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3244                         my ( $match_code, $rmsg ) =
3245                           compare_patterns( $group_level,
3246                             $tok, $tok_m, $pat, $pat_m, $pad );
3247                         if ($match_code) {
3248                             if    ( $match_code == 1 ) { $i_nomatch = $i }
3249                             elsif ( $match_code == 2 ) { $i_nomatch = 0 }
3250                             last;
3251                         }
3252                     }
3253                 }
3254                 $imax_align = $i_nomatch - 1;
3255             }
3256
3257             $line_m->{'imax_pair'} = $imax_align;
3258
3259         } ## end loop over lines
3260
3261         # Put fence at end of subgroup
3262         $line->{'imax_pair'} = -1;
3263
3264     } ## end loop over subgroups
3265
3266     # if there are hanging side comments, propagate the pair info down to them
3267     # so that lines can just look back one line for their pair info.
3268     if ( @{$rlines} > @{$rnew_lines} ) {
3269         my $last_pair_info = -1;
3270         foreach my $line ( @{$rlines} ) {
3271             if ( $line->{'is_hanging_side_comment'} ) {
3272                 $line->{'imax_pair'} = $last_pair_info;
3273             }
3274             else {
3275                 $last_pair_info = $line->{'imax_pair'};
3276             }
3277         }
3278     }
3279     return;
3280 } ## end sub match_line_pairs
3281
3282 sub compare_patterns {
3283
3284     my ( $group_level, $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3285
3286     # helper routine for sub match_line_pairs to decide if patterns in two
3287     # lines match well enough..Given
3288     #   $tok_m, $pat_m = token and pattern of first line
3289     #   $tok, $pat     = token and pattern of second line
3290     #   $pad           = 0 if no padding is needed, !=0 otherwise
3291     # return code:
3292     #   0 = patterns match, continue
3293     #   1 = no match
3294     #   2 = no match, and lines do not match at all
3295
3296     my $GoToMsg     = EMPTY_STRING;
3297     my $return_code = 0;
3298
3299     use constant EXPLAIN_COMPARE_PATTERNS => 0;
3300
3301     my ( $alignment_token, $lev, $tag, $tok_count ) =
3302       decode_alignment_token($tok);
3303
3304     # We have to be very careful about aligning commas
3305     # when the pattern's don't match, because it can be
3306     # worse to create an alignment where none is needed
3307     # than to omit one.  Here's an example where the ','s
3308     # are not in named containers.  The first line below
3309     # should not match the next two:
3310     #   ( $a, $b ) = ( $b, $r );
3311     #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3312     #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3313     if ( $alignment_token eq ',' ) {
3314
3315         # do not align commas unless they are in named
3316         # containers
3317         if ( $tok !~ /[A-Za-z]/ ) {
3318             $return_code = 1;
3319             $GoToMsg     = "do not align commas in unnamed containers";
3320         }
3321         else {
3322             $return_code = 0;
3323         }
3324     }
3325
3326     # do not align parens unless patterns match;
3327     # large ugly spaces can occur in math expressions.
3328     elsif ( $alignment_token eq '(' ) {
3329
3330         # But we can allow a match if the parens don't
3331         # require any padding.
3332         if ( $pad != 0 ) {
3333             $return_code = 1;
3334             $GoToMsg     = "do not align '(' unless patterns match or pad=0";
3335         }
3336         else {
3337             $return_code = 0;
3338         }
3339     }
3340
3341     # Handle an '=' alignment with different patterns to
3342     # the left.
3343     elsif ( $alignment_token eq '=' ) {
3344
3345         # It is best to be a little restrictive when
3346         # aligning '=' tokens.  Here is an example of
3347         # two lines that we will not align:
3348         #       my $variable=6;
3349         #       $bb=4;
3350         # The problem is that one is a 'my' declaration,
3351         # and the other isn't, so they're not very similar.
3352         # We will filter these out by comparing the first
3353         # letter of the pattern.  This is crude, but works
3354         # well enough.
3355         if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3356             $GoToMsg     = "first character before equals differ";
3357             $return_code = 1;
3358         }
3359
3360         # The introduction of sub 'prune_alignment_tree'
3361         # enabled alignment of lists left of the equals with
3362         # other scalar variables. For example:
3363         # my ( $D, $s, $e ) = @_;
3364         # my $d             = length $D;
3365         # my $c             = $e - $s - $d;
3366
3367         # But this would change formatting of a lot of scripts,
3368         # so for now we prevent alignment of comma lists on the
3369         # left with scalars on the left.  We will also prevent
3370         # any partial alignments.
3371
3372         # set return code 2 if the = is at line level, but
3373         # set return code 1 if the = is below line level, i.e.
3374         #  sub new { my ( $p, $v ) = @_; bless \$v, $p }
3375         #  sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3376
3377         elsif ( ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) ) {
3378             $GoToMsg     = "mixed commas/no-commas before equals";
3379             $return_code = 1;
3380             if ( $lev eq $group_level ) {
3381                 $return_code = 2;
3382             }
3383         }
3384         else {
3385             $return_code = 0;
3386         }
3387     }
3388     else {
3389         $return_code = 0;
3390     }
3391
3392     EXPLAIN_COMPARE_PATTERNS
3393       && $return_code
3394       && print STDERR "no match because $GoToMsg\n";
3395
3396     return ( $return_code, \$GoToMsg );
3397
3398 } ## end sub compare_patterns
3399
3400 sub fat_comma_to_comma {
3401     my ($str) = @_;
3402
3403     # We are changing '=>' to ',' and removing any trailing decimal count
3404     # because currently fat commas have a count and commas do not.
3405     # For example, we will change '=>2+{-3.2' into ',2+{-3'
3406     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3407     return $str;
3408 } ## end sub fat_comma_to_comma
3409
3410 sub get_line_token_info {
3411
3412     # scan lines of tokens and return summary information about the range of
3413     # levels and patterns.
3414     my ($rlines) = @_;
3415
3416     # First scan to check monotonicity. Here is an example of several
3417     # lines which are monotonic. The = is the lowest level, and
3418     # the commas are all one level deeper. So this is not nonmonotonic.
3419     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
3420     #  $$d{"days"}    = [ "d",  "day", "days" ];
3421     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
3422     my @all_token_info;
3423     my $all_monotonic = 1;
3424     foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3425         my ($line) = $rlines->[$jj];
3426         my $rtokens = $line->{'rtokens'};
3427         my $last_lev;
3428         my $is_monotonic = 1;
3429         my $i            = -1;
3430         foreach my $tok ( @{$rtokens} ) {
3431             $i++;
3432             my ( $raw_tok, $lev, $tag, $tok_count ) =
3433               decode_alignment_token($tok);
3434             push @{ $all_token_info[$jj] },
3435               [ $raw_tok, $lev, $tag, $tok_count ];
3436             last if ( $tok eq '#' );
3437             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3438             $last_lev = $lev;
3439         }
3440         if ( !$is_monotonic ) { $all_monotonic = 0 }
3441     }
3442
3443     my $rline_values = [];
3444     foreach my $jj ( 0 .. @{$rlines} - 1 ) {
3445         my ($line) = $rlines->[$jj];
3446
3447         my $rtokens = $line->{'rtokens'};
3448         my $i       = -1;
3449         my ( $lev_min, $lev_max );
3450         my $token_pattern_max = EMPTY_STRING;
3451         my %saw_level;
3452         my $is_monotonic = 1;
3453
3454         # find the index of the last token before the side comment
3455         my $imax      = @{$rtokens} - 2;
3456         my $imax_true = $imax;
3457
3458         # If the entire group is monotonic, and the line ends in a comma list,
3459         # walk it back to the first such comma. this will have the effect of
3460         # making all trailing ragged comma lists match in the prune tree
3461         # routine.  these trailing comma lists can better be handled by later
3462         # alignment rules.
3463
3464         # Treat fat commas the same as commas here by converting them to
3465         # commas.  This will improve the chance of aligning the leading parts
3466         # of ragged lists.
3467
3468         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3469         if ( $all_monotonic && $tok_end =~ /^,/ ) {
3470             my $ii = $imax - 1;
3471             while ( $ii >= 0
3472                 && fat_comma_to_comma( $rtokens->[$ii] ) eq $tok_end )
3473             {
3474                 $imax = $ii;
3475                 $ii--;
3476             }
3477         }
3478
3479         # make a first pass to find level range
3480         my $last_lev;
3481         foreach my $tok ( @{$rtokens} ) {
3482             $i++;
3483             last if ( $i > $imax );
3484             last if ( $tok eq '#' );
3485             my ( $raw_tok, $lev, $tag, $tok_count ) =
3486               @{ $all_token_info[$jj]->[$i] };
3487
3488             last if ( $tok eq '#' );
3489             $token_pattern_max .= $tok;
3490             $saw_level{$lev}++;
3491             if ( !defined($lev_min) ) {
3492                 $lev_min = $lev;
3493                 $lev_max = $lev;
3494             }
3495             else {
3496                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
3497                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
3498                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3499             }
3500             $last_lev = $lev;
3501         }
3502
3503         # handle no levels
3504         my $rtoken_patterns = {};
3505         my $rtoken_indexes  = {};
3506         my @levs            = sort keys %saw_level;
3507         if ( !defined($lev_min) ) {
3508             $lev_min                     = -1;
3509             $lev_max                     = -1;
3510             $levs[0]                     = -1;
3511             $rtoken_patterns->{$lev_min} = EMPTY_STRING;
3512             $rtoken_indexes->{$lev_min}  = [];
3513         }
3514
3515         # handle one level
3516         elsif ( $lev_max == $lev_min ) {
3517             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3518             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3519         }
3520
3521         # handle multiple levels
3522         else {
3523             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3524             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3525
3526             my $lev_top = pop @levs;    # alread did max level
3527             my $itok    = -1;
3528             foreach my $tok ( @{$rtokens} ) {
3529                 $itok++;
3530                 last if ( $itok > $imax );
3531                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3532                   @{ $all_token_info[$jj]->[$itok] };
3533                 last if ( $raw_tok eq '#' );
3534                 foreach my $lev_test (@levs) {
3535                     next if ( $lev > $lev_test );
3536                     $rtoken_patterns->{$lev_test} .= $tok;
3537                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
3538                 }
3539             }
3540             push @levs, $lev_top;
3541         }
3542
3543         push @{$rline_values},
3544           [
3545             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3546             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3547           ];
3548
3549         # debug
3550         0 && do {
3551             local $LIST_SEPARATOR = ')(';
3552             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3553             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3554                 print "$key => $rtoken_patterns->{$key}\n";
3555                 print "$key => @{$rtoken_indexes->{$key}}\n";
3556             }
3557         };
3558     } ## end loop over lines
3559     return ( $rline_values, $all_monotonic );
3560 } ## end sub get_line_token_info
3561
3562 sub prune_alignment_tree {
3563     my ($rlines) = @_;
3564     my $jmax = @{$rlines} - 1;
3565     return unless $jmax > 0;
3566
3567     # Vertical alignment in perltidy is done as an iterative process.  The
3568     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3569     # etc) for vertical alignment.  Then we have to delete all alignments
3570     # which, if actually made, would detract from overall alignment.  This
3571     # is done in several phases of which this is one.
3572
3573     # In this routine we look at the alignments of a group of lines as a
3574     # hierarchical tree.  We will 'prune' the tree to limited depths if that
3575     # will improve overall alignment at the lower depths.
3576     # For each line we will be looking at its alignment patterns down to
3577     # different fixed depths. For each depth, we include all lower depths and
3578     # ignore all higher depths.  We want to see if we can get alignment of a
3579     # larger group of lines if we ignore alignments at some lower depth.
3580     # Here is an # example:
3581
3582     # for (
3583     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
3584     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
3585     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
3586     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
3587     # );
3588
3589     # In the above example, all lines have three commas at the lowest depth
3590     # (zero), so if there were no other alignments, these lines would all
3591     # align considering only the zero depth alignment token.  But some lines
3592     # have additional comma alignments at the next depth, so we need to decide
3593     # if we should drop those to keep the top level alignments, or keep those
3594     # for some additional low level alignments at the expense losing some top
3595     # level alignments.  In this case we will drop the deeper level commas to
3596     # keep the entire collection aligned.  But in some cases the decision could
3597     # go the other way.
3598
3599     # The tree for this example at the zero depth has one node containing
3600     # all four lines, since they are identical at zero level (three commas).
3601     # At depth one, there are three 'children' nodes, namely:
3602     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3603     # - line 3, which has 2 commas at depth 1
3604     # - line4, which has a ';' and a ',' at depth 1
3605     # There are no deeper alignments in this example.
3606     # so the tree structure for this example is:
3607     #
3608     #    depth 0         depth 1      depth 2
3609     #    [lines 1-4] --  [line 1-2] -  (empty)
3610     #                 |  [line 3]   -  (empty)
3611     #                 |  [line 4]   -  (empty)
3612
3613     # We can carry this to any depth, but it is not really useful to go below
3614     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3615     # alignments at depth >=2.
3616
3617     use constant EXPLAIN_PRUNE => 0;
3618
3619     #-------------------------------------------------------------------
3620     # Prune Tree Step 1. Start by scanning the lines and collecting info
3621     #-------------------------------------------------------------------
3622
3623     # Note that the caller had this info but we have to redo this now because
3624     # alignment tokens may have been deleted.
3625     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3626
3627     # If all the lines have levels which increase monotonically from left to
3628     # right, then the sweep-left-to-right pass can do a better job of alignment
3629     # than pruning, and without deleting alignments.
3630     return if ($all_monotonic);
3631
3632     # Contents of $rline_values
3633     #   [
3634     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3635     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3636     #   ];
3637
3638     # We can work to any depth, but there is little advantage to working
3639     # to a a depth greater than 2
3640     my $MAX_DEPTH = 2;
3641
3642     # This arrays will hold the tree of alignment tokens at different depths
3643     # for these lines.
3644     my @match_tree;
3645
3646     # Tree nodes contain these values:
3647     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3648     #                        $nc_beg_p, $nc_end_p, $rindexes];
3649     # where
3650     #      $depth = 0,1,2 = index of depth of the match
3651
3652     #  $jbeg beginning index j of the range of lines in this match
3653     #  $jend ending index j of the range of lines in this match
3654     #  $n_parent = index of the containing group at $depth-1, if it exists
3655     #  $level = actual level of code being matched in this group
3656     #  $pattern = alignment pattern being matched
3657     #  $nc_beg_p = first child
3658     #  $nc_end_p = last child
3659     #  $rindexes = ref to token indexes
3660
3661     # the patterns and levels of the current group being formed at each depth
3662     my ( @token_patterns_current, @levels_current, @token_indexes_current );
3663
3664     # the patterns and levels of the next line being tested at each depth
3665     my ( @token_patterns_next, @levels_next, @token_indexes_next );
3666
3667     #-----------------------------------------------------------
3668     # define a recursive worker subroutine for tree construction
3669     #-----------------------------------------------------------
3670
3671     # This is a recursive routine which is called if a match condition changes
3672     # at any depth when a new line is encountered.  It ends the match node
3673     # which changed plus all deeper nodes attached to it.
3674     my $end_node;
3675     $end_node = sub {
3676         my ( $depth, $jl, $n_parent ) = @_;
3677
3678         # $depth is the tree depth
3679         # $jl is the  index of the line
3680         # $n_parent is index of the parent node of this node
3681
3682         return if ( $depth > $MAX_DEPTH );
3683
3684         # end any current group at this depth
3685         if (   $jl >= 0
3686             && defined( $match_tree[$depth] )
3687             && @{ $match_tree[$depth] }
3688             && defined( $levels_current[$depth] ) )
3689         {
3690             $match_tree[$depth]->[-1]->[1] = $jl;
3691         }
3692
3693         # Define the index of the node we will create below
3694         my $ng_self = 0;
3695         if ( defined( $match_tree[$depth] ) ) {
3696             $ng_self = @{ $match_tree[$depth] };
3697         }
3698
3699         # end any next deeper child node(s)
3700         $end_node->( $depth + 1, $jl, $ng_self );
3701
3702         # update the levels being matched
3703         $token_patterns_current[$depth] = $token_patterns_next[$depth];
3704         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
3705         $levels_current[$depth]         = $levels_next[$depth];
3706
3707         # Do not start a new group at this level if it is not being used
3708         if ( !defined( $levels_next[$depth] )
3709             || $depth > 0
3710             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3711         {
3712             return;
3713         }
3714
3715         # Create a node for the next group at this depth. We initially assume
3716         # that it will continue to $jmax, and correct that later if the node
3717         # ends earlier.
3718         push @{ $match_tree[$depth] },
3719           [
3720             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3721             $token_patterns_current[$depth],
3722             undef, undef, $token_indexes_current[$depth],
3723           ];
3724
3725         return;
3726     };    ## end sub end_node
3727
3728     #-----------------------------------------------------
3729     # Prune Tree Step 2. Loop to form the tree of matches.
3730     #-----------------------------------------------------
3731     foreach my $jp ( 0 .. $jmax ) {
3732
3733         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3734         my $jm = $jp - 1;
3735
3736         # Pull out needed values for the next line
3737         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3738             $is_monotonic, $imax_true, $imax )
3739           = @{ $rline_values->[$jp] };
3740
3741         # Transfer levels and patterns for this line to the working arrays.
3742         # If the number of levels differs from our chosen MAX_DEPTH ...
3743         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3744         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3745         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3746         if ( @{$rlevs} > $MAX_DEPTH ) {
3747             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3748         }
3749         my $depth = 0;
3750         foreach my $item (@levels_next) {
3751             $token_patterns_next[$depth] =
3752               defined($item) ? $rtoken_patterns->{$item} : undef;
3753             $token_indexes_next[$depth] =
3754               defined($item) ? $rtoken_indexes->{$item} : undef;
3755             $depth++;
3756         }
3757
3758         # Look for a change in match groups...
3759
3760         # Initialize on the first line
3761         if ( $jp == 0 ) {
3762             my $n_parent;
3763             $end_node->( 0, $jm, $n_parent );
3764         }
3765
3766         # End groups if a hard flag has been set
3767         elsif ( $rlines->[$jm]->{'end_group'} ) {
3768             my $n_parent;
3769             $end_node->( 0, $jm, $n_parent );
3770         }
3771
3772         # Continue at hanging side comment
3773         elsif ( $rlines->[$jp]->{'is_hanging_side_comment'} ) {
3774             next;
3775         }
3776
3777         # Otherwise see if anything changed and update the tree if so
3778         else {
3779             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3780
3781                 my $def_current = defined( $token_patterns_current[$depth] );
3782                 my $def_next    = defined( $token_patterns_next[$depth] );
3783                 last unless ( $def_current || $def_next );
3784                 if (   !$def_current
3785                     || !$def_next
3786                     || $token_patterns_current[$depth] ne
3787                     $token_patterns_next[$depth] )
3788                 {
3789                     my $n_parent;
3790                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3791                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3792                     }
3793                     $end_node->( $depth, $jm, $n_parent );
3794                     last;
3795                 }
3796             }
3797         }
3798     } ## end loop to form tree of matches
3799
3800     #---------------------------------------------------------
3801     # Prune Tree Step 3. Make links from parent to child nodes
3802     #---------------------------------------------------------
3803
3804     # It seemed cleaner to do this as a separate step rather than during tree
3805     # construction.  The children nodes have links up to the parent node which
3806     # created them.  Now make links in the opposite direction, so the parents
3807     # can find the children.  We store the range of children nodes ($nc_beg,
3808     # $nc_end) of each parent with two additional indexes in the original array.
3809     # These will be undef if no children.
3810     foreach my $depth ( reverse( 1 .. $MAX_DEPTH ) ) {
3811         next unless defined( $match_tree[$depth] );
3812         my $nc_max = @{ $match_tree[$depth] } - 1;
3813         my $np_now;
3814         foreach my $nc ( 0 .. $nc_max ) {
3815             my $np = $match_tree[$depth]->[$nc]->[2];
3816             if ( !defined($np) ) {
3817
3818                 # shouldn't happen
3819                 #print STDERR "lost child $np at depth $depth\n";
3820                 next;
3821             }
3822             if ( !defined($np_now) || $np != $np_now ) {
3823                 $np_now = $np;
3824                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3825             }
3826             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3827         }
3828     } ## end loop to make links down to the child nodes
3829
3830     EXPLAIN_PRUNE > 0 && do {
3831         print "Tree complete. Found these groups:\n";
3832         foreach my $depth ( 0 .. $MAX_DEPTH ) {
3833             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3834         }
3835     };
3836
3837     #------------------------------------------------------
3838     # Prune Tree Step 4. Make a list of nodes to be deleted
3839     #------------------------------------------------------
3840
3841     #  list of lines with tokens to be deleted:
3842     #  [$jbeg, $jend, $level_keep]
3843     #  $jbeg..$jend is the range of line indexes,
3844     #  $level_keep is the minimum level to keep
3845     my @delete_list;
3846
3847     # Not currently used:
3848     #  Groups with ending comma lists and their range of sizes:
3849     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3850     ## my %ragged_comma_group;
3851
3852     # We work with a list of nodes to visit at the next deeper depth.
3853     my @todo_list;
3854     if ( defined( $match_tree[0] ) ) {
3855         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3856     }
3857
3858     foreach my $depth ( 0 .. $MAX_DEPTH ) {
3859         last unless (@todo_list);
3860         my @todo_next;
3861         foreach my $np (@todo_list) {
3862             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3863                 $rindexes_p )
3864               = @{ $match_tree[$depth]->[$np] };
3865             my $nlines_p = $jend_p - $jbeg_p + 1;
3866
3867             # nothing to do if no children
3868             next unless defined($nc_beg_p);
3869
3870             # Define the number of lines to either keep or delete a child node.
3871             # This is the key decision we have to make.  We want to delete
3872             # short runs of matched lines, and keep long runs.  It seems easier
3873             # for the eye to follow breaks in monotonic level changes than
3874             # non-monotonic level changes.  For example, the following looks
3875             # best if we delete the lower level alignments:
3876
3877             #  [1]                  ~~ [];
3878             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3879             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
3880             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
3881             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
3882             #  $deep1               ~~ $deep1;
3883
3884             # So we will use two thresholds.
3885             my $nmin_mono     = $depth + 2;
3886             my $nmin_non_mono = $depth + 6;
3887             if ( $nmin_mono > $nlines_p - 1 ) {
3888                 $nmin_mono = $nlines_p - 1;
3889             }
3890             if ( $nmin_non_mono > $nlines_p - 1 ) {
3891                 $nmin_non_mono = $nlines_p - 1;
3892             }
3893
3894             # loop to keep or delete each child node
3895             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3896                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3897                     $nc_end_c )
3898                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
3899                 my $nlines_c     = $jend_c - $jbeg_c + 1;
3900                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3901                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3902                 if ( $nlines_c < $nmin ) {
3903 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3904                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3905                 }
3906                 else {
3907 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3908                     push @todo_next, $nc;
3909                 }
3910             }
3911         }
3912         @todo_list = @todo_next;
3913     } ## end loop to mark nodes to delete
3914
3915     #------------------------------------------------------------
3916     # Prune Tree Step 5. Loop to delete selected alignment tokens
3917     #------------------------------------------------------------
3918     foreach my $item (@delete_list) {
3919         my ( $jbeg, $jend, $level_keep ) = @{$item};
3920         foreach my $jj ( $jbeg .. $jend ) {
3921             my $line = $rlines->[$jj];
3922             my @idel;
3923             my $rtokens = $line->{'rtokens'};
3924             my $imax    = @{$rtokens} - 2;
3925             foreach my $i ( 0 .. $imax ) {
3926                 my $tok = $rtokens->[$i];
3927                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3928                   decode_alignment_token($tok);
3929                 if ( $lev > $level_keep ) {
3930                     push @idel, $i;
3931                 }
3932             }
3933             if (@idel) {
3934                 delete_selected_tokens( $line, \@idel );
3935             }
3936         }
3937     } ## end loop to delete selected alignment tokens
3938
3939     return;
3940 } ## end sub prune_alignment_tree
3941
3942 sub Dump_tree_groups {
3943     my ( $rgroup, $msg ) = @_;
3944
3945     # Debug routine
3946     print "$msg\n";
3947     local $LIST_SEPARATOR = ')(';
3948     foreach my $item ( @{$rgroup} ) {
3949         my @fix = @{$item};
3950         foreach my $val (@fix) { $val = "undef" unless defined $val; }
3951         $fix[4] = "...";
3952         print "(@fix)\n";
3953     }
3954     return;
3955 } ## end sub Dump_tree_groups
3956
3957 {    ## closure for sub is_marginal_match
3958
3959     my %is_if_or;
3960     my %is_assignment;
3961     my %is_good_alignment;
3962
3963     # This test did not give sufficiently better results to use as an update,
3964     # but the flag is worth keeping as a starting point for future testing.
3965     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
3966
3967     BEGIN {
3968
3969         my @q = qw(
3970           if unless or ||
3971         );
3972         @is_if_or{@q} = (1) x scalar(@q);
3973
3974         @q = qw(
3975           = **= += *= &= <<= &&=
3976           -= /= |= >>= ||= //=
3977           .= %= ^=
3978           x=
3979         );
3980         @is_assignment{@q} = (1) x scalar(@q);
3981
3982         # Vertically aligning on certain "good" tokens is usually okay
3983         # so we can be less restrictive in marginal cases.
3984         @q = qw( { ? => = );
3985         push @q, (',');
3986         @is_good_alignment{@q} = (1) x scalar(@q);
3987     } ## end BEGIN
3988
3989     sub is_marginal_match {
3990
3991         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
3992
3993         # Decide if we should undo some or all of the common alignments of a
3994         # group of just two lines.
3995
3996         # Given:
3997         #   $line_0 and $line_1 - the two lines
3998         #   $group_level = the indentation level of the group being processed
3999         #   $imax_align = the maximum index of the common alignment tokens
4000         #                 of the two lines
4001         #   $imax_prev  = the maximum index of the common alignment tokens
4002         #                 with the line before $line_0 (=-1 of does not exist)
4003
4004         # Return:
4005         #   $is_marginal = true if the two lines should NOT be fully aligned
4006         #                = false if the two lines can remain fully aligned
4007         #   $imax_align  = the index of the highest alignment token shared by
4008         #                  these two lines to keep if the match is marginal.
4009
4010         # When we have an alignment group of just two lines like this, we are
4011         # working in the twilight zone of what looks good and what looks bad.
4012         # This routine is a collection of rules which work have been found to
4013         # work fairly well, but it will need to be updated from time to time.
4014
4015         my $is_marginal = 0;
4016
4017         #---------------------------------------
4018         # Always align certain special cases ...
4019         #---------------------------------------
4020         if (
4021
4022             # always keep alignments of a terminal else or ternary
4023             defined( $line_1->{'j_terminal_match'} )
4024
4025             # always align lists
4026             || $line_0->{'list_type'}
4027
4028             # always align hanging side comments
4029             || $line_1->{'is_hanging_side_comment'}
4030
4031           )
4032         {
4033             return ( $is_marginal, $imax_align );
4034         }
4035
4036         my $jmax_0           = $line_0->{'jmax'};
4037         my $jmax_1           = $line_1->{'jmax'};
4038         my $rtokens_1        = $line_1->{'rtokens'};
4039         my $rtokens_0        = $line_0->{'rtokens'};
4040         my $rfield_lengths_0 = $line_0->{'rfield_lengths'};
4041         my $rfield_lengths_1 = $line_1->{'rfield_lengths'};
4042         my $rpatterns_0      = $line_0->{'rpatterns'};
4043         my $rpatterns_1      = $line_1->{'rpatterns'};
4044         my $imax_next        = $line_1->{'imax_pair'};
4045
4046         # We will scan the alignment tokens and set a flag '$is_marginal' if
4047         # it seems that the an alignment would look bad.
4048         my $max_pad            = 0;
4049         my $saw_good_alignment = 0;
4050         my $saw_if_or;                # if we saw an 'if' or 'or' at group level
4051         my $raw_tokb = EMPTY_STRING;  # first token seen at group level
4052         my $jfirst_bad;
4053         my $line_ending_fat_comma;    # is last token just a '=>' ?
4054         my $j0_eq_pad;
4055         my $j0_max_pad = 0;
4056
4057         foreach my $j ( 0 .. $jmax_1 - 2 ) {
4058             my ( $raw_tok, $lev, $tag, $tok_count ) =
4059               decode_alignment_token( $rtokens_1->[$j] );
4060             if ( $raw_tok && $lev == $group_level ) {
4061                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
4062                 $saw_if_or ||= $is_if_or{$raw_tok};
4063             }
4064
4065             # When the first of the two lines ends in a bare '=>' this will
4066             # probably be marginal match.  (For a bare =>, the next field length
4067             # will be 2 or 3, depending on side comment)
4068             $line_ending_fat_comma =
4069                  $j == $jmax_1 - 2
4070               && $raw_tok eq '=>'
4071               && $rfield_lengths_0->[ $j + 1 ] <= 3;
4072
4073             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4074             if ( $j == 0 ) {
4075                 $pad += $line_1->{'leading_space_count'} -
4076                   $line_0->{'leading_space_count'};
4077
4078                 # Remember the pad at a leading equals
4079                 if ( $raw_tok eq '=' && $lev == $group_level ) {
4080                     $j0_eq_pad = $pad;
4081                     $j0_max_pad =
4082                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4083                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4084                 }
4085             }
4086
4087             if ( $pad < 0 )        { $pad     = -$pad }
4088             if ( $pad > $max_pad ) { $max_pad = $pad }
4089             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4090                 $saw_good_alignment = 1;
4091             }
4092             else {
4093                 $jfirst_bad = $j unless defined($jfirst_bad);
4094             }
4095             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4096
4097                 # Flag this as a marginal match since patterns differ.
4098                 # Normally, we will not allow just two lines to match if
4099                 # marginal. But we can allow matching in some specific cases.
4100
4101                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
4102                 $is_marginal = 1  if ( $is_marginal == 0 );
4103                 if ( $raw_tok eq '=' ) {
4104
4105                     # Here is an example of a marginal match:
4106                     #       $done{$$op} = 1;
4107                     #       $op         = compile_bblock($op);
4108                     # The left tokens are both identifiers, but
4109                     # one accesses a hash and the other doesn't.
4110                     # We'll let this be a tentative match and undo
4111                     # it later if we don't find more than 2 lines
4112                     # in the group.
4113                     $is_marginal = 2;
4114                 }
4115             }
4116         }
4117
4118         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4119
4120         # Turn off the "marginal match" flag in some cases...
4121         # A "marginal match" occurs when the alignment tokens agree
4122         # but there are differences in the other tokens (patterns).
4123         # If we leave the marginal match flag set, then the rule is that we
4124         # will align only if there are more than two lines in the group.
4125         # We will turn of the flag if we almost have a match
4126         # and either we have seen a good alignment token or we
4127         # just need a small pad (2 spaces) to fit.  These rules are
4128         # the result of experimentation.  Tokens which misaligned by just
4129         # one or two characters are annoying.  On the other hand,
4130         # large gaps to less important alignment tokens are also annoying.
4131         if ( $is_marginal == 1
4132             && ( $saw_good_alignment || $max_pad < 3 ) )
4133         {
4134             $is_marginal = 0;
4135         }
4136
4137         # We will use the line endings to help decide on alignments...
4138         # See if the lines end with semicolons...
4139         my $sc_term0;
4140         my $sc_term1;
4141         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4142
4143             # shouldn't happen
4144         }
4145         else {
4146             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4147             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4148             $sc_term0 = $pat0 =~ /;b?$/;
4149             $sc_term1 = $pat1 =~ /;b?$/;
4150         }
4151
4152         if ( !$is_marginal && !$sc_term0 ) {
4153
4154             # First line of assignment should be semicolon terminated.
4155             # For example, do not align here:
4156             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4157             #    $$href{-NUM_DIRS} = 0;
4158             if ( $is_assignment{$raw_tokb} ) {
4159                 $is_marginal = 1;
4160             }
4161         }
4162
4163         # Try to avoid some undesirable alignments of opening tokens
4164         # for example, the space between grep and { here:
4165         #  return map { ( $_ => $_ ) }
4166         #    grep     { /$handles/ } $self->_get_delegate_method_list;
4167         $is_marginal ||=
4168              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4169           && $jmax_1 == 2
4170           && $sc_term0 ne $sc_term1;
4171
4172         #---------------------------------------
4173         # return if this is not a marginal match
4174         #---------------------------------------
4175         if ( !$is_marginal ) {
4176             return ( $is_marginal, $imax_align );
4177         }
4178
4179         # Undo the marginal match flag in certain cases,
4180
4181         # Two lines with a leading equals-like operator are allowed to
4182         # align if the patterns to the left of the equals are the same.
4183         # For example the following two lines are a marginal match but have
4184         # the same left side patterns, so we will align the equals.
4185         #     my $orig = my $format = "^<<<<< ~~\n";
4186         #     my $abc  = "abc";
4187         # But these have a different left pattern so they will not be
4188         # aligned
4189         #     $xmldoc .= $`;
4190         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
4191
4192         # First line semicolon terminated but second not, usually ok:
4193         #               my $want = "'ab', 'a', 'b'";
4194         #               my $got  = join( ", ",
4195         #                    map { defined($_) ? "'$_'" : "undef" }
4196         #                          @got );
4197         #  First line not semicolon terminated, Not OK to match:
4198         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4199         #      $$href{-NUM_DIRS} = 0;
4200         my $pat0 = $rpatterns_0->[0];
4201         my $pat1 = $rpatterns_1->[0];
4202
4203         #---------------------------------------------------------
4204         # Turn off the marginal flag for some types of assignments
4205         #---------------------------------------------------------
4206         if ( $is_assignment{$raw_tokb} ) {
4207
4208             # undo marginal flag if first line is semicolon terminated
4209             # and leading patters match
4210             if ($sc_term0) {    # && $sc_term1) {
4211                 $is_marginal = $pat0 ne $pat1;
4212             }
4213         }
4214         elsif ( $raw_tokb eq '=>' ) {
4215
4216             # undo marginal flag if patterns match
4217             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4218         }
4219         elsif ( $raw_tokb eq '=~' ) {
4220
4221             # undo marginal flag if both lines are semicolon terminated
4222             # and leading patters match
4223             if ( $sc_term1 && $sc_term0 ) {
4224                 $is_marginal = $pat0 ne $pat1;
4225             }
4226         }
4227
4228         #-----------------------------------------------------
4229         # Turn off the marginal flag if we saw an 'if' or 'or'
4230         #-----------------------------------------------------
4231
4232         # A trailing 'if' and 'or' often gives a good alignment
4233         # For example, we can align these:
4234         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
4235         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4236
4237         # or
4238         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
4239         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4240
4241         if ($saw_if_or) {
4242
4243             # undo marginal flag if both lines are semicolon terminated
4244             if ( $sc_term0 && $sc_term1 ) {
4245                 $is_marginal = 0;
4246             }
4247         }
4248
4249         # For a marginal match, only keep matches before the first 'bad' match
4250         if (   $is_marginal
4251             && defined($jfirst_bad)
4252             && $imax_align > $jfirst_bad - 1 )
4253         {
4254             $imax_align = $jfirst_bad - 1;
4255         }
4256
4257         #----------------------------------------------------------
4258         # Allow sweep to match lines with leading '=' in some cases
4259         #----------------------------------------------------------
4260         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4261
4262             if (
4263
4264                 # If there is a following line with leading equals, or
4265                 # preceding line with leading equals, then let the sweep align
4266                 # them without restriction.  For example, the first two lines
4267                 # here are a marginal match, but they are followed by a line
4268                 # with leading equals, so the sweep-lr logic can align all of
4269                 # the lines:
4270
4271                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4272                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4273                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4274                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4275
4276                 # Likewise, if we reverse the two pairs we want the same result
4277
4278                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4279                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4280                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4281                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4282
4283                 (
4284                        $imax_next >= 0
4285                     || $imax_prev >= 0
4286                     || TEST_MARGINAL_EQ_ALIGNMENT
4287                 )
4288                 && $j0_eq_pad >= -$j0_max_pad
4289                 && $j0_eq_pad <= $j0_max_pad
4290               )
4291             {
4292
4293                 # But do not do this if there is a comma before the '='.
4294                 # For example, the first two lines below have commas and
4295                 # therefore are not allowed to align with lines 3 & 4:
4296
4297                 # my ( $x, $y ) = $self->Size();                      #<--line_0
4298                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4299                 # my $vx = $right - $left;
4300                 # my $vy = $bottom - $top;
4301
4302                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4303                     $imax_align = 0;
4304                 }
4305             }
4306         }
4307
4308         return ( $is_marginal, $imax_align );
4309     } ## end sub is_marginal_match
4310 } ## end closure for sub is_marginal_match
4311
4312 sub get_extra_leading_spaces {
4313
4314     my ( $rlines, $rgroups ) = @_;
4315
4316     #----------------------------------------------------------
4317     # Define any extra indentation space (for the -lp option).
4318     # Here is why:
4319     # If a list has side comments, sub scan_list must dump the
4320     # list before it sees everything.  When this happens, it sets
4321     # the indentation to the standard scheme, but notes how
4322     # many spaces it would have liked to use.  We may be able
4323     # to recover that space here in the event that all of the
4324     # lines of a list are back together again.
4325     #----------------------------------------------------------
4326
4327     return 0 unless ( @{$rlines} && @{$rgroups} );
4328
4329     my $object = $rlines->[0]->{'indentation'};
4330     return 0 unless ( ref($object) );
4331     my $extra_leading_spaces            = 0;
4332     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4333     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4334
4335     my $min_spaces = $extra_indentation_spaces_wanted;
4336     if ( $min_spaces > 0 ) { $min_spaces = 0 }
4337
4338     # loop over all groups
4339     my $ng      = -1;
4340     my $ngroups = @{$rgroups};
4341     foreach my $item ( @{$rgroups} ) {
4342         $ng++;
4343         my ( $jbeg, $jend ) = @{$item};
4344         foreach my $j ( $jbeg .. $jend ) {
4345             next if ( $j == 0 );
4346
4347             # all indentation objects must be the same
4348             if ( $object != $rlines->[$j]->{'indentation'} ) {
4349                 return 0;
4350             }
4351         }
4352
4353        # find the maximum space without exceeding the line length for this group
4354         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4355         my $spaces =
4356           ( $avail > $extra_indentation_spaces_wanted )
4357           ? $extra_indentation_spaces_wanted
4358           : $avail;
4359
4360         #--------------------------------------------------------
4361         # Note: min spaces can be negative; for example with -gnu
4362         # f(
4363         #   do { 1; !!(my $x = bless []); }
4364         #  );
4365         #--------------------------------------------------------
4366         # The following rule is needed to match older formatting:
4367         # For multiple groups, we will keep spaces non-negative.
4368         # For a single group, we will allow a negative space.
4369         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4370
4371         # update the minimum spacing
4372         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4373             $extra_leading_spaces = $spaces;
4374         }
4375     }
4376
4377     # update the indentation object because with -icp the terminal
4378     # ');' will use the same adjustment.
4379     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4380     return $extra_leading_spaces;
4381 } ## end sub get_extra_leading_spaces
4382
4383 sub forget_side_comment {
4384     my ($self) = @_;
4385     $self->[_last_side_comment_column_] = 0;
4386     return;
4387 }
4388
4389 sub is_good_side_comment_column {
4390     my ( $self, $line, $line_number, $level, $num5 ) = @_;
4391
4392     # Upon encountering the first side comment of a group, decide if
4393     # a previous side comment should be forgotten.  This involves
4394     # checking several rules.
4395
4396     # Return true to KEEP old comment location
4397     # Return false to FORGET old comment location
4398     my $KEEP   = 1;
4399     my $FORGET = 0;
4400
4401     my $rfields                 = $line->{'rfields'};
4402     my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4403
4404     # RULE1: Never forget comment before a hanging side comment
4405     return $KEEP if ($is_hanging_side_comment);
4406
4407     # RULE2: Forget a side comment after a short line difference,
4408     # where 'short line difference' is computed from a formula.
4409     # Using a smooth formula helps minimize sudden large changes.
4410     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4411     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4412
4413     # '$num5' is the number of comments in the first 5 lines after the first
4414     # comment.  It is needed to keep a compact group of side comments from
4415     # being influenced by a more distant side comment.
4416     $num5 = 1 unless ($num5);
4417
4418     # Some values:
4419
4420     #        $adiff  $num5   $short_diff
4421     #        0       *       12
4422     #        1       1       6
4423     #        1       2       4
4424     #        1       3       3
4425     #        1       4       2
4426     #        2       1       4
4427     #        2       2       2
4428     #        2       3       1
4429     #        3       1       3
4430     #        3       2       1
4431
4432     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4433
4434     return $FORGET
4435       if ( $line_diff > $short_diff
4436         || !$self->[_rOpts_valign_side_comments_] );
4437
4438     # RULE3: Forget a side comment if this line is at lower level and
4439     # ends a block
4440     my $last_sc_level = $self->[_last_side_comment_level_];
4441     return $FORGET
4442       if ( $level < $last_sc_level
4443         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4444
4445     # RULE 4: Forget the last side comment if this comment might join a cached
4446     # line ...
4447     if ( my $cached_line_type = get_cached_line_type() ) {
4448
4449         # ... otherwise side comment alignment will get messed up.
4450         # For example, in the following test script
4451         # with using 'perltidy -sct -act=2', the last comment would try to
4452         # align with the previous and then be in the wrong column when
4453         # the lines are combined:
4454
4455         # foreach $line (
4456         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
4457         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
4458         #    [0, 4, 8], [2, 4, 6]
4459         #  )                                     # diagonals
4460         return $FORGET
4461           if ( $cached_line_type == 2 || $cached_line_type == 4 );
4462     }
4463
4464     # Otherwise, keep it alive
4465     return $KEEP;
4466 } ## end sub is_good_side_comment_column
4467
4468 sub align_side_comments {
4469
4470     my ( $self, $rlines, $rgroups ) = @_;
4471
4472     # Align any side comments in this batch of lines
4473
4474     # Given:
4475     #  $rlines  - the lines
4476     #  $rgroups - the partition of the lines into groups
4477     #
4478     # We will be working group-by-group because all side comments
4479     # (real or fake) in each group are already aligned. So we just have
4480     # to make alignments between groups wherever possible.
4481
4482     # An unusual aspect is that within each group we have aligned both real
4483     # and fake side comments.  This has the consequence that the lengths of
4484     # long lines without real side comments can cause 'push' all side comments
4485     # to the right.  This seems unusual, but testing with and without this
4486     # feature shows that it is usually better this way.  Otherwise, side
4487     # comments can be hidden between long lines without side comments and
4488     # thus be harder to read.
4489
4490     my $group_level        = $self->[_group_level_];
4491     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4492       && $group_level == $self->[_last_level_written_];
4493
4494     # Find groups with side comments, and remember the first nonblank comment
4495     my $j_sc_beg;
4496     my @todo;
4497     my $ng = -1;
4498     foreach my $item ( @{$rgroups} ) {
4499         $ng++;
4500         my ( $jbeg, $jend ) = @{$item};
4501         foreach my $j ( $jbeg .. $jend ) {
4502             my $line = $rlines->[$j];
4503             my $jmax = $line->{'jmax'};
4504             if ( $line->{'rfield_lengths'}->[$jmax] ) {
4505
4506                 # this group has a line with a side comment
4507                 push @todo, $ng;
4508                 if ( !defined($j_sc_beg) ) {
4509                     $j_sc_beg = $j;
4510                 }
4511                 last;
4512             }
4513         }
4514     }
4515
4516     # done if no groups with side comments
4517     return unless @todo;
4518
4519     # Count $num5 = number of comments in the 5 lines after the first comment
4520     # This is an important factor in a decision formula
4521     my $num5 = 1;
4522     foreach my $jj ( $j_sc_beg + 1 .. @{$rlines} - 1 ) {
4523         my $ldiff = $jj - $j_sc_beg;
4524         last if ( $ldiff > 5 );
4525         my $line   = $rlines->[$jj];
4526         my $jmax   = $line->{'jmax'};
4527         my $sc_len = $line->{'rfield_lengths'}->[$jmax];
4528         next unless ($sc_len);
4529         $num5++;
4530     }
4531
4532     # Forget the old side comment location if necessary
4533     my $line_0 = $rlines->[$j_sc_beg];
4534     my $lnum =
4535       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4536     my $keep_it =
4537       $self->is_good_side_comment_column( $line_0, $lnum, $group_level, $num5 );
4538     my $last_side_comment_column =
4539       $keep_it ? $self->[_last_side_comment_column_] : 0;
4540
4541     # If there are multiple groups we will do two passes
4542     # so that we can find a common alignment for all groups.
4543     my $MAX_PASS = @todo > 1 ? 2 : 1;
4544
4545     # Loop over passes
4546     my $max_comment_column = $last_side_comment_column;
4547     foreach my $PASS ( 1 .. $MAX_PASS ) {
4548
4549         # If there are two passes, then on the last pass make the old column
4550         # equal to the largest of the group.  This will result in the comments
4551         # being aligned if possible.
4552         if ( $PASS == $MAX_PASS ) {
4553             $last_side_comment_column = $max_comment_column;
4554         }
4555
4556         # Loop over the groups with side comments
4557         my $column_limit;
4558         foreach my $ng (@todo) {
4559             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4560
4561             # Note that since all lines in a group have common alignments, we
4562             # just have to work on one of the lines (the first line).
4563             my $line                    = $rlines->[$jbeg];
4564             my $jmax                    = $line->{'jmax'};
4565             my $is_hanging_side_comment = $line->{'is_hanging_side_comment'};
4566             last
4567               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4568
4569             # the maximum space without exceeding the line length:
4570             my $avail = $line->get_available_space_on_right();
4571
4572             # try to use the previous comment column
4573             my $side_comment_column = $line->get_column( $jmax - 1 );
4574             my $move = $last_side_comment_column - $side_comment_column;
4575
4576             # Remember the maximum possible column of the first line with
4577             # side comment
4578             if ( !defined($column_limit) ) {
4579                 $column_limit = $side_comment_column + $avail;
4580             }
4581
4582             next if ( $jmax <= 0 );
4583
4584             # but if this doesn't work, give up and use the minimum space
4585             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4586             if ( $move > $avail ) {
4587                 $move = $min_move;
4588             }
4589
4590             # but we want some minimum space to the comment
4591             if (   $move >= 0
4592                 && $j_sc_beg == 0
4593                 && $continuing_sc_flow )
4594             {
4595                 $min_move = 0;
4596             }
4597
4598             # remove constraints on hanging side comments
4599             if ($is_hanging_side_comment) { $min_move = 0 }
4600
4601             if ( $move < $min_move ) {
4602                 $move = $min_move;
4603             }
4604
4605             # don't exceed the available space
4606             if ( $move > $avail ) { $move = $avail }
4607
4608             # We can only increase space, never decrease.
4609             if ( $move < 0 ) { $move = 0 }
4610
4611             # Discover the largest column on the preliminary  pass
4612             if ( $PASS < $MAX_PASS ) {
4613                 my $col = $line->get_column( $jmax - 1 ) + $move;
4614
4615                 # but ignore columns too large for the starting line
4616                 if ( $col > $max_comment_column && $col < $column_limit ) {
4617                     $max_comment_column = $col;
4618                 }
4619             }
4620
4621             # Make the changes on the final pass
4622             else {
4623                 $line->increase_field_width( $jmax - 1, $move );
4624
4625                 # remember this column for the next group
4626                 $last_side_comment_column = $line->get_column( $jmax - 1 );
4627             }
4628         } ## end loop over groups
4629     } ## end loop over passes
4630
4631     # Find the last side comment
4632     my $j_sc_last;
4633     my $ng_last = $todo[-1];
4634     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4635     foreach my $jj ( reverse( $jbeg .. $jend ) ) {
4636         my $line = $rlines->[$jj];
4637         my $jmax = $line->{'jmax'};
4638         if ( $line->{'rfield_lengths'}->[$jmax] ) {
4639             $j_sc_last = $jj;
4640             last;
4641         }
4642     }
4643
4644     # Save final side comment info for possible use by the next batch
4645     if ( defined($j_sc_last) ) {
4646         my $line_number =
4647           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4648         $self->[_last_side_comment_column_]      = $last_side_comment_column;
4649         $self->[_last_side_comment_line_number_] = $line_number;
4650         $self->[_last_side_comment_level_]       = $group_level;
4651     }
4652     return;
4653 } ## end sub align_side_comments
4654
4655 ###############################
4656 # CODE SECTION 6: Output Step A
4657 ###############################
4658
4659 sub valign_output_step_A {
4660
4661     #------------------------------------------------------------
4662     # This is Step A in writing vertically aligned lines.
4663     # The line is prepared according to the alignments which have
4664     # been found. Then it is shipped to the next step.
4665     #------------------------------------------------------------
4666
4667     my ( $self, $rinput_hash ) = @_;
4668
4669     my $line                 = $rinput_hash->{line};
4670     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
4671     my $do_not_align         = $rinput_hash->{do_not_align};
4672     my $group_leader_length  = $rinput_hash->{group_leader_length};
4673     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4674     my $level                = $rinput_hash->{level};
4675     my $maximum_line_length  = $rinput_hash->{maximum_line_length};
4676
4677     my $rfields                   = $line->{'rfields'};
4678     my $rfield_lengths            = $line->{'rfield_lengths'};
4679     my $leading_space_count       = $line->{'leading_space_count'};
4680     my $outdent_long_lines        = $line->{'outdent_long_lines'};
4681     my $maximum_field_index       = $line->{'jmax'};
4682     my $rvertical_tightness_flags = $line->{'rvertical_tightness_flags'};
4683     my $Kend                      = $line->{'Kend'};
4684     my $level_end                 = $line->{'level_end'};
4685
4686     # Check for valid hash keys at end of lifetime of $line during development
4687     DEVEL_MODE
4688       && check_keys( $line, \%valid_LINE_keys,
4689         "Checking line keys at valign_output_step_A", 1 );
4690
4691     # add any extra spaces
4692     if ( $leading_space_count > $group_leader_length ) {
4693         $leading_space_count += $min_ci_gap;
4694     }
4695
4696     my $str     = $rfields->[0];
4697     my $str_len = $rfield_lengths->[0];
4698
4699     my @alignments = @{ $line->{'ralignments'} };
4700     if ( @alignments != $maximum_field_index + 1 ) {
4701
4702         # Shouldn't happen: sub install_new_alignments makes jmax alignments
4703         my $jmax_alignments = @alignments - 1;
4704         if (DEVEL_MODE) {
4705             Fault(
4706 "alignment jmax=$jmax_alignments should equal $maximum_field_index\n"
4707             );
4708         }
4709         $do_not_align = 1;
4710     }
4711
4712     # loop to concatenate all fields of this line and needed padding
4713     my $total_pad_count = 0;
4714     for my $j ( 1 .. $maximum_field_index ) {
4715
4716         # skip zero-length side comments
4717         last
4718           if (
4719             ( $j == $maximum_field_index )
4720             && ( !defined( $rfields->[$j] )
4721                 || ( $rfield_lengths->[$j] == 0 ) )
4722           );
4723
4724         # compute spaces of padding before this field
4725         my $col = $alignments[ $j - 1 ]->{'column'};
4726         my $pad = $col - ( $str_len + $leading_space_count );
4727
4728         if ($do_not_align) {
4729             $pad =
4730               ( $j < $maximum_field_index )
4731               ? 0
4732               : $self->[_rOpts_minimum_space_to_comment_] - 1;
4733         }
4734
4735         # if the -fpsc flag is set, move the side comment to the selected
4736         # column if and only if it is possible, ignoring constraints on
4737         # line length and minimum space to comment
4738         if (   $self->[_rOpts_fixed_position_side_comment_]
4739             && $j == $maximum_field_index )
4740         {
4741             my $newpad =
4742               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4743             if ( $newpad >= 0 ) { $pad = $newpad; }
4744         }
4745
4746         # accumulate the padding
4747         if ( $pad > 0 ) { $total_pad_count += $pad; }
4748
4749         # only add padding when we have a finite field;
4750         # this avoids extra terminal spaces if we have empty fields
4751         if ( $rfield_lengths->[$j] > 0 ) {
4752             $str .= SPACE x $total_pad_count;
4753             $str_len += $total_pad_count;
4754             $total_pad_count = 0;
4755             $str .= $rfields->[$j];
4756             $str_len += $rfield_lengths->[$j];
4757         }
4758         else {
4759             $total_pad_count = 0;
4760         }
4761     }
4762
4763     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4764
4765     # ship this line off
4766     $self->valign_output_step_B(
4767         {
4768             leading_space_count => $leading_space_count + $extra_leading_spaces,
4769             line                => $str,
4770             line_length         => $str_len,
4771             side_comment_length => $side_comment_length,
4772             outdent_long_lines  => $outdent_long_lines,
4773             rvertical_tightness_flags => $rvertical_tightness_flags,
4774             level                     => $level,
4775             level_end                 => $level_end,
4776             Kend                      => $Kend,
4777             maximum_line_length       => $maximum_line_length,
4778         }
4779     );
4780     return;
4781 } ## end sub valign_output_step_A
4782
4783 sub combine_fields {
4784
4785     # We have a group of two lines for which we do not want to align tokens
4786     # between index $imax_align and the side comment.  So we will delete fields
4787     # between $imax_align and the side comment.  Alignments have already
4788     # been set so we have to adjust them.
4789
4790     my ( $line_0, $line_1, $imax_align ) = @_;
4791
4792     if ( !defined($imax_align) ) { $imax_align = -1 }
4793
4794     # First delete the unwanted tokens
4795     my $jmax_old = $line_0->{'jmax'};
4796     my @idel     = ( $imax_align + 1 .. $jmax_old - 2 );
4797     return unless (@idel);
4798
4799     # Get old alignments before any changes are made
4800     my @old_alignments = @{ $line_0->{'ralignments'} };
4801
4802     foreach my $line ( $line_0, $line_1 ) {
4803         delete_selected_tokens( $line, \@idel );
4804     }
4805
4806     # Now adjust the alignments.  Note that the side comment alignment
4807     # is always at jmax-1, and there is an ending alignment at jmax.
4808     my @new_alignments;
4809     if ( $imax_align >= 0 ) {
4810         @new_alignments[ 0 .. $imax_align ] =
4811           @old_alignments[ 0 .. $imax_align ];
4812     }
4813
4814     my $jmax_new = $line_0->{'jmax'};
4815
4816     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4817     $new_alignments[$jmax_new]       = $old_alignments[$jmax_old];
4818     $line_0->{'ralignments'}         = \@new_alignments;
4819     $line_1->{'ralignments'}         = \@new_alignments;
4820     return;
4821 } ## end sub combine_fields
4822
4823 sub get_output_line_number {
4824
4825     # The output line number reported to a caller =
4826     # the number of items still in the buffer +
4827     # the number of items written.
4828     return $_[0]->group_line_count() +
4829       $_[0]->[_file_writer_object_]->get_output_line_number();
4830 } ## end sub get_output_line_number
4831
4832 ###############################
4833 # CODE SECTION 7: Output Step B
4834 ###############################
4835
4836 {    ## closure for sub valign_output_step_B
4837
4838     # These are values for a cache used by valign_output_step_B.
4839     my $cached_line_text;
4840     my $cached_line_text_length;
4841     my $cached_line_type;
4842     my $cached_line_opening_flag;
4843     my $cached_line_closing_flag;
4844     my $cached_seqno;
4845     my $cached_line_valid;
4846     my $cached_line_leading_space_count;
4847     my $cached_seqno_string;
4848     my $cached_line_Kend;
4849     my $cached_line_maximum_length;
4850
4851     # These are passed to step_C:
4852     my $seqno_string;
4853     my $last_nonblank_seqno_string;
4854
4855     sub set_last_nonblank_seqno_string {
4856         my ($val) = @_;
4857         $last_nonblank_seqno_string = $val;
4858         return;
4859     }
4860
4861     sub get_cached_line_opening_flag {
4862         return $cached_line_opening_flag;
4863     }
4864
4865     sub get_cached_line_type {
4866         return $cached_line_type;
4867     }
4868
4869     sub set_cached_line_valid {
4870         my ($val) = @_;
4871         $cached_line_valid = $val;
4872         return;
4873     }
4874
4875     sub get_cached_seqno {
4876         return $cached_seqno;
4877     }
4878
4879     sub initialize_step_B_cache {
4880
4881         # valign_output_step_B cache:
4882         $cached_line_text                = EMPTY_STRING;
4883         $cached_line_text_length         = 0;
4884         $cached_line_type                = 0;
4885         $cached_line_opening_flag        = 0;
4886         $cached_line_closing_flag        = 0;
4887         $cached_seqno                    = 0;
4888         $cached_line_valid               = 0;
4889         $cached_line_leading_space_count = 0;
4890         $cached_seqno_string             = EMPTY_STRING;
4891         $cached_line_Kend                = undef;
4892         $cached_line_maximum_length      = undef;
4893
4894         # These vars hold a string of sequence numbers joined together used by
4895         # the cache
4896         $seqno_string               = EMPTY_STRING;
4897         $last_nonblank_seqno_string = EMPTY_STRING;
4898         return;
4899     } ## end sub initialize_step_B_cache
4900
4901     sub _flush_step_B_cache {
4902         my ($self) = @_;
4903
4904         # Send any text in the step_B cache on to step_C
4905         if ($cached_line_type) {
4906             $seqno_string = $cached_seqno_string;
4907             $self->valign_output_step_C(
4908                 $seqno_string,
4909                 $last_nonblank_seqno_string,
4910
4911                 $cached_line_text,
4912                 $cached_line_leading_space_count,
4913                 $self->[_last_level_written_],
4914                 $cached_line_Kend,
4915             );
4916             $cached_line_type           = 0;
4917             $cached_line_text           = EMPTY_STRING;
4918             $cached_line_text_length    = 0;
4919             $cached_seqno_string        = EMPTY_STRING;
4920             $cached_line_Kend           = undef;
4921             $cached_line_maximum_length = undef;
4922         }
4923         return;
4924     } ## end sub _flush_step_B_cache
4925
4926     sub handle_cached_line {
4927
4928         my ( $self, $rinput, $leading_string, $leading_string_length ) = @_;
4929
4930         # The cached line will either be:
4931         # - passed along to step_C, or
4932         # - or combined with the current line
4933
4934         my $last_level_written = $self->[_last_level_written_];
4935
4936         my $leading_space_count       = $rinput->{leading_space_count};
4937         my $str                       = $rinput->{line};
4938         my $str_length                = $rinput->{line_length};
4939         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4940         my $level                     = $rinput->{level};
4941         my $level_end                 = $rinput->{level_end};
4942         my $maximum_line_length       = $rinput->{maximum_line_length};
4943
4944         my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4945             $seqno_beg, $seqno_end );
4946         if ($rvertical_tightness_flags) {
4947
4948             $open_or_close = $rvertical_tightness_flags->{_vt_type};
4949             $seqno_beg     = $rvertical_tightness_flags->{_vt_seqno_beg};
4950         }
4951
4952         # Dump an invalid cached line
4953         if ( !$cached_line_valid ) {
4954             $self->valign_output_step_C(
4955                 $seqno_string,
4956                 $last_nonblank_seqno_string,
4957
4958                 $cached_line_text,
4959                 $cached_line_leading_space_count,
4960                 $last_level_written,
4961                 $cached_line_Kend,
4962             );
4963         }
4964
4965         # Handle cached line ending in OPENING tokens
4966         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
4967
4968             my $gap = $leading_space_count - $cached_line_text_length;
4969
4970             # handle option of just one tight opening per line:
4971             if ( $cached_line_opening_flag == 1 ) {
4972                 if ( defined($open_or_close) && $open_or_close == 1 ) {
4973                     $gap = -1;
4974                 }
4975             }
4976
4977             # Do not join the lines if this might produce a one-line
4978             # container which exceeds the maximum line length.  This is
4979             # necessary prevent blinking, particularly with the combination
4980             # -xci -pvt=2.  In that case a one-line block alternately forms
4981             # and breaks, causing -xci to alternately turn on and off (case
4982             # b765).
4983             # Patched to fix cases b656 b862 b971 b972: always do the check
4984             # if the maximum line length changes (due to -vmll).
4985             if (
4986                 $gap >= 0
4987                 && ( $maximum_line_length != $cached_line_maximum_length
4988                     || ( defined($level_end) && $level > $level_end ) )
4989               )
4990             {
4991                 my $test_line_length =
4992                   $cached_line_text_length + $gap + $str_length;
4993
4994                 # Add a small tolerance in the length test (fixes case b862)
4995                 if ( $test_line_length > $cached_line_maximum_length - 2 ) {
4996                     $gap = -1;
4997                 }
4998             }
4999
5000             if ( $gap >= 0 && defined($seqno_beg) ) {
5001                 $maximum_line_length   = $cached_line_maximum_length;
5002                 $leading_string        = $cached_line_text . SPACE x $gap;
5003                 $leading_string_length = $cached_line_text_length + $gap;
5004                 $leading_space_count   = $cached_line_leading_space_count;
5005                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5006                 $level        = $last_level_written;
5007             }
5008             else {
5009                 $self->valign_output_step_C(
5010                     $seqno_string,
5011                     $last_nonblank_seqno_string,
5012
5013                     $cached_line_text,
5014                     $cached_line_leading_space_count,
5015                     $last_level_written,
5016                     $cached_line_Kend,
5017                 );
5018             }
5019         }
5020
5021         # Handle cached line ending in CLOSING tokens
5022         else {
5023             my $test_line =
5024               $cached_line_text . SPACE x $cached_line_closing_flag . $str;
5025             my $test_line_length =
5026               $cached_line_text_length +
5027               $cached_line_closing_flag +
5028               $str_length;
5029             if (
5030
5031                 # The new line must start with container
5032                 $seqno_beg
5033
5034                 # The container combination must be okay..
5035                 && (
5036
5037                     # okay to combine like types
5038                     ( $open_or_close == $cached_line_type )
5039
5040                     # closing block brace may append to non-block
5041                     || ( $cached_line_type == 2 && $open_or_close == 4 )
5042
5043                     # something like ');'
5044                     || ( !$open_or_close && $cached_line_type == 2 )
5045
5046                 )
5047
5048                 # The combined line must fit
5049                 && ( $test_line_length <= $cached_line_maximum_length )
5050               )
5051             {
5052
5053                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5054
5055                 # Patch to outdent closing tokens ending # in ');' If we
5056                 # are joining a line like ');' to a previous stacked set of
5057                 # closing tokens, then decide if we may outdent the
5058                 # combined stack to the indentation of the ');'.  Since we
5059                 # should not normally outdent any of the other tokens more
5060                 # than the indentation of the lines that contained them, we
5061                 # will only do this if all of the corresponding opening
5062                 # tokens were on the same line.  This can happen with -sot
5063                 # and -sct.
5064
5065                 # For example, it is ok here:
5066                 #   __PACKAGE__->load_components( qw(
5067                 #         PK::Auto
5068                 #         Core
5069                 #   ));
5070                 #
5071                 # But, for example, we do not outdent in this example
5072                 # because that would put the closing sub brace out farther
5073                 # than the opening sub brace:
5074                 #
5075                 #   perltidy -sot -sct
5076                 #   $c->Tk::bind(
5077                 #       '<Control-f>' => sub {
5078                 #           my ($c) = @_;
5079                 #           my $e = $c->XEvent;
5080                 #           itemsUnderArea $c;
5081                 #       } );
5082                 #
5083                 if (   $str =~ /^\);/
5084                     && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5085                 {
5086
5087                     # The way to tell this is if the stacked sequence
5088                     # numbers of this output line are the reverse of the
5089                     # stacked sequence numbers of the previous non-blank
5090                     # line of sequence numbers.  So we can join if the
5091                     # previous nonblank string of tokens is the mirror
5092                     # image.  For example if stack )}] is 13:8:6 then we
5093                     # are looking for a leading stack like [{( which
5094                     # is 6:8:13. We only need to check the two ends,
5095                     # because the intermediate tokens must fall in order.
5096                     # Note on speed: having to split on colons and
5097                     # eliminate multiple colons might appear to be slow,
5098                     # but it's not an issue because we almost never come
5099                     # through here.  In a typical file we don't.
5100
5101                     $seqno_string               =~ s/^:+//;
5102                     $last_nonblank_seqno_string =~ s/^:+//;
5103                     $seqno_string               =~ s/:+/:/g;
5104                     $last_nonblank_seqno_string =~ s/:+/:/g;
5105
5106                     # how many spaces can we outdent?
5107                     my $diff =
5108                       $cached_line_leading_space_count - $leading_space_count;
5109                     if (   $diff > 0
5110                         && length($seqno_string)
5111                         && length($last_nonblank_seqno_string) ==
5112                         length($seqno_string) )
5113                     {
5114                         my @seqno_last =
5115                           ( split /:/, $last_nonblank_seqno_string );
5116                         my @seqno_now = ( split /:/, $seqno_string );
5117                         if (   @seqno_now
5118                             && @seqno_last
5119                             && $seqno_now[-1] == $seqno_last[0]
5120                             && $seqno_now[0] == $seqno_last[-1] )
5121                         {
5122
5123                             # OK to outdent ..
5124                             # for absolute safety, be sure we only remove
5125                             # whitespace
5126                             my $ws = substr( $test_line, 0, $diff );
5127                             if ( ( length($ws) == $diff )
5128                                 && $ws =~ /^\s+$/ )
5129                             {
5130
5131                                 $test_line = substr( $test_line, $diff );
5132                                 $cached_line_leading_space_count -= $diff;
5133                                 $last_level_written =
5134                                   $self->level_change(
5135                                     $cached_line_leading_space_count,
5136                                     $diff, $last_level_written );
5137                                 $self->reduce_valign_buffer_indentation($diff);
5138                             }
5139
5140                             # shouldn't happen, but not critical:
5141                             ##else {
5142                             ## ERROR transferring indentation here
5143                             ##}
5144                         }
5145                     }
5146                 }
5147
5148                 # Change the args to look like we received the combined line
5149                 $str                   = $test_line;
5150                 $str_length            = $test_line_length;
5151                 $leading_string        = EMPTY_STRING;
5152                 $leading_string_length = 0;
5153                 $leading_space_count   = $cached_line_leading_space_count;
5154                 $level                 = $last_level_written;
5155                 $maximum_line_length   = $cached_line_maximum_length;
5156             }
5157             else {
5158                 $self->valign_output_step_C(
5159                     $seqno_string,
5160                     $last_nonblank_seqno_string,
5161
5162                     $cached_line_text,
5163                     $cached_line_leading_space_count,
5164                     $last_level_written,
5165                     $cached_line_Kend,
5166                 );
5167             }
5168         }
5169         return ( $str, $str_length, $leading_string, $leading_string_length,
5170             $leading_space_count, $level, $maximum_line_length, );
5171
5172     } ## end sub handle_cached_line
5173
5174     sub valign_output_step_B {
5175
5176         #---------------------------------------------------------
5177         # This is Step B in writing vertically aligned lines.
5178         # Vertical tightness is applied according to preset flags.
5179         # In particular this routine handles stacking of opening
5180         # and closing tokens.
5181         #---------------------------------------------------------
5182
5183         my ( $self, $rinput ) = @_;
5184
5185         my $leading_space_count       = $rinput->{leading_space_count};
5186         my $str                       = $rinput->{line};
5187         my $str_length                = $rinput->{line_length};
5188         my $side_comment_length       = $rinput->{side_comment_length};
5189         my $outdent_long_lines        = $rinput->{outdent_long_lines};
5190         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
5191         my $level                     = $rinput->{level};
5192         my $level_end                 = $rinput->{level_end};
5193         my $Kend                      = $rinput->{Kend};
5194         my $maximum_line_length       = $rinput->{maximum_line_length};
5195
5196         # Useful -gcs test cases for wide characters are
5197         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
5198
5199         # handle outdenting of long lines:
5200         my $is_outdented_line;
5201         if ($outdent_long_lines) {
5202             my $excess =
5203               $str_length -
5204               $side_comment_length +
5205               $leading_space_count -
5206               $maximum_line_length;
5207             if ( $excess > 0 ) {
5208                 $leading_space_count = 0;
5209                 my $file_writer_object = $self->[_file_writer_object_];
5210                 my $last_outdented_line_at =
5211                   $file_writer_object->get_output_line_number();
5212                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
5213
5214                 my $outdented_line_count = $self->[_outdented_line_count_];
5215                 unless ($outdented_line_count) {
5216                     $self->[_first_outdented_line_at_] =
5217                       $last_outdented_line_at;
5218                 }
5219                 $outdented_line_count++;
5220                 $self->[_outdented_line_count_] = $outdented_line_count;
5221                 $is_outdented_line = 1;
5222             }
5223         }
5224
5225         # Make preliminary leading whitespace.  It could get changed
5226         # later by entabbing, so we have to keep track of any changes
5227         # to the leading_space_count from here on.
5228         my $leading_string =
5229           $leading_space_count > 0
5230           ? ( SPACE x $leading_space_count )
5231           : EMPTY_STRING;
5232         my $leading_string_length = length($leading_string);
5233
5234         # Unpack any recombination data; it was packed by
5235         # sub 'Formatter::set_vertical_tightness_flags'
5236
5237         # old   hash              Meaning
5238         # index key
5239         #
5240         # 0   _vt_type:           1=opening non-block    2=closing non-block
5241         #                         3=opening block brace  4=closing block brace
5242         #
5243         # 1a  _vt_opening_flag:  1=no multiple steps, 2=multiple steps ok
5244         # 1b  _vt_closing_flag:    spaces of padding to use if closing
5245         # 2   _vt_seqno:          sequence number of container
5246         # 3   _vt_valid flag:     do not append if this flag is false. Will be
5247         #           true if appropriate -vt flag is set.  Otherwise, Will be
5248         #           made true only for 2 line container in parens with -lp
5249         # 4   _vt_seqno_beg:      sequence number of first token of line
5250         # 5   _vt_seqno_end:      sequence number of last token of line
5251         # 6   _vt_min_lines:      min number of lines for joining opening cache,
5252         #                           0=no constraint
5253         # 7   _vt_max_lines:      max number of lines for joining opening cache,
5254         #                           0=no constraint
5255
5256         my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
5257             $seqno_beg, $seqno_end );
5258         if ($rvertical_tightness_flags) {
5259
5260             $open_or_close = $rvertical_tightness_flags->{_vt_type};
5261             $opening_flag  = $rvertical_tightness_flags->{_vt_opening_flag};
5262             $closing_flag  = $rvertical_tightness_flags->{_vt_closing_flag};
5263             $seqno         = $rvertical_tightness_flags->{_vt_seqno};
5264             $valid         = $rvertical_tightness_flags->{_vt_valid_flag};
5265             $seqno_beg     = $rvertical_tightness_flags->{_vt_seqno_beg};
5266             $seqno_end     = $rvertical_tightness_flags->{_vt_seqno_end};
5267         }
5268
5269         $seqno_string = $seqno_end;
5270
5271         # handle any cached line ..
5272         # either append this line to it or write it out
5273         # Note: the function length() is used in this next test out of caution.
5274         # All testing has shown that the variable $cached_line_text_length is
5275         # correct, but its calculation is complex and a loss of cached text
5276         # would be a disaster.
5277         if ( length($cached_line_text) ) {
5278
5279             (
5280                 $str,
5281                 $str_length,
5282                 $leading_string,
5283                 $leading_string_length,
5284                 $leading_space_count,
5285                 $level,
5286                 $maximum_line_length
5287
5288             ) = $self->handle_cached_line( $rinput, $leading_string,
5289                 $leading_string_length );
5290
5291             $cached_line_type           = 0;
5292             $cached_line_text           = EMPTY_STRING;
5293             $cached_line_text_length    = 0;
5294             $cached_line_Kend           = undef;
5295             $cached_line_maximum_length = undef;
5296
5297         }
5298
5299         # make the line to be written
5300         my $line        = $leading_string . $str;
5301         my $line_length = $leading_string_length + $str_length;
5302
5303         # Safety check: be sure that a line to be cached as a stacked block
5304         # brace line ends in the appropriate opening or closing block brace.
5305         # This should always be the case if the caller set flags correctly.
5306         # Code '3' is for -sobb, code '4' is for -scbb.
5307         if ($open_or_close) {
5308             if (   $open_or_close == 3 && $line !~ /\{\s*$/
5309                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5310             {
5311                 $open_or_close = 0;
5312             }
5313         }
5314
5315         # write or cache this line ...
5316         # fix for case b999: do not cache an outdented line
5317         # fix for b1378: do not cache an empty line
5318         if (  !$open_or_close
5319             || $side_comment_length > 0
5320             || $is_outdented_line
5321             || !$line_length )
5322         {
5323             $self->valign_output_step_C(
5324                 $seqno_string,
5325                 $last_nonblank_seqno_string,
5326
5327                 $line,
5328                 $leading_space_count,
5329                 $level,
5330                 $Kend,
5331             );
5332         }
5333         else {
5334             $cached_line_text                = $line;
5335             $cached_line_text_length         = $line_length;
5336             $cached_line_type                = $open_or_close;
5337             $cached_line_opening_flag        = $opening_flag;
5338             $cached_line_closing_flag        = $closing_flag;
5339             $cached_seqno                    = $seqno;
5340             $cached_line_valid               = $valid;
5341             $cached_line_leading_space_count = $leading_space_count;
5342             $cached_seqno_string             = $seqno_string;
5343             $cached_line_Kend                = $Kend;
5344             $cached_line_maximum_length      = $maximum_line_length;
5345         }
5346
5347         $self->[_last_level_written_]       = $level;
5348         $self->[_last_side_comment_length_] = $side_comment_length;
5349         return;
5350     } ## end sub valign_output_step_B
5351 }
5352
5353 ###############################
5354 # CODE SECTION 8: Output Step C
5355 ###############################
5356
5357 {    ## closure for sub valign_output_step_C
5358
5359     # Vertical alignment buffer used by valign_output_step_C
5360     my $valign_buffer_filling;
5361     my @valign_buffer;
5362
5363     sub initialize_valign_buffer {
5364         @valign_buffer         = ();
5365         $valign_buffer_filling = EMPTY_STRING;
5366         return;
5367     }
5368
5369     sub dump_valign_buffer {
5370         my ($self) = @_;
5371
5372         # Send all lines in the current buffer on to step_D
5373         if (@valign_buffer) {
5374             foreach (@valign_buffer) {
5375                 $self->valign_output_step_D( @{$_} );
5376             }
5377             @valign_buffer = ();
5378         }
5379         $valign_buffer_filling = EMPTY_STRING;
5380         return;
5381     } ## end sub dump_valign_buffer
5382
5383     sub reduce_valign_buffer_indentation {
5384
5385         my ( $self, $diff ) = @_;
5386
5387         # Reduce the leading indentation of lines in the current
5388         # buffer by $diff spaces
5389         if ( $valign_buffer_filling && $diff ) {
5390             my $max_valign_buffer = @valign_buffer;
5391             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5392                 my ( $line, $leading_space_count, $level, $Kend ) =
5393                   @{ $valign_buffer[$i] };
5394                 my $ws = substr( $line, 0, $diff );
5395                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5396                     $line = substr( $line, $diff );
5397                 }
5398                 if ( $leading_space_count >= $diff ) {
5399                     $leading_space_count -= $diff;
5400                     $level =
5401                       $self->level_change( $leading_space_count, $diff,
5402                         $level );
5403                 }
5404                 $valign_buffer[$i] =
5405                   [ $line, $leading_space_count, $level, $Kend ];
5406             }
5407         }
5408         return;
5409     } ## end sub reduce_valign_buffer_indentation
5410
5411     sub valign_output_step_C {
5412
5413         #-----------------------------------------------------------------------
5414         # This is Step C in writing vertically aligned lines.
5415         # Lines are either stored in a buffer or passed along to the next step.
5416         # The reason for storing lines is that we may later want to reduce their
5417         # indentation when -sot and -sct are both used.
5418         #-----------------------------------------------------------------------
5419         my (
5420             $self,
5421             $seqno_string,
5422             $last_nonblank_seqno_string,
5423
5424             @args_to_D,
5425         ) = @_;
5426
5427         # Dump any saved lines if we see a line with an unbalanced opening or
5428         # closing token.
5429         $self->dump_valign_buffer()
5430           if ( $seqno_string && $valign_buffer_filling );
5431
5432         # Either store or write this line
5433         if ($valign_buffer_filling) {
5434             push @valign_buffer, [@args_to_D];
5435         }
5436         else {
5437             $self->valign_output_step_D(@args_to_D);
5438         }
5439
5440         # For lines starting or ending with opening or closing tokens..
5441         if ($seqno_string) {
5442             $last_nonblank_seqno_string = $seqno_string;
5443             set_last_nonblank_seqno_string($seqno_string);
5444
5445             # Start storing lines when we see a line with multiple stacked
5446             # opening tokens.
5447             # patch for RT #94354, requested by Colin Williams
5448             if (   index( $seqno_string, ':' ) >= 0
5449                 && $seqno_string =~ /^\d+(\:+\d+)+$/
5450                 && $args_to_D[0] !~ /^[\}\)\]\:\?]/ )
5451             {
5452
5453                 # This test is efficient but a little subtle: The first test
5454                 # says that we have multiple sequence numbers and hence
5455                 # multiple opening or closing tokens in this line.  The second
5456                 # part of the test rejects stacked closing and ternary tokens.
5457                 # So if we get here then we should have stacked unbalanced
5458                 # opening tokens.
5459
5460                 # Here is a complex example:
5461
5462                 # Foo($Bar[0], {  # (side comment)
5463                 #     baz => 1,
5464                 # });
5465
5466                 # The first line has sequence 6::4.  It does not begin with
5467                 # a closing token or ternary, so it passes the test and must be
5468                 # stacked opening tokens.
5469
5470                 # The last line has sequence 4:6 but is a stack of closing
5471                 # tokens, so it gets rejected.
5472
5473                 # Note that the sequence number of an opening token for a qw
5474                 # quote is a negative number and will be rejected.  For
5475                 # example, for the following line: skip_symbols([qw(
5476                 # $seqno_string='10:5:-1'.  It would be okay to accept it but I
5477                 # decided not to do this after testing.
5478
5479                 $valign_buffer_filling = $seqno_string;
5480
5481             }
5482         }
5483         return;
5484     } ## end sub valign_output_step_C
5485 }
5486
5487 ###############################
5488 # CODE SECTION 9: Output Step D
5489 ###############################
5490
5491 sub valign_output_step_D {
5492
5493     #----------------------------------------------------------------
5494     # This is Step D in writing vertically aligned lines.
5495     # It is the end of the vertical alignment pipeline.
5496     # Write one vertically aligned line of code to the output object.
5497     #----------------------------------------------------------------
5498
5499     my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5500
5501     # The line is currently correct if there is no tabbing (recommended!)
5502     # We may have to lop off some leading spaces and replace with tabs.
5503     if ( $leading_space_count > 0 ) {
5504
5505         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5506         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5507         my $rOpts_entab_leading_whitespace =
5508           $self->[_rOpts_entab_leading_whitespace_];
5509
5510         # Nothing to do if no tabs
5511         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5512             || $rOpts_indent_columns <= 0 )
5513         {
5514
5515             # nothing to do
5516         }
5517
5518         # Handle entab option
5519         elsif ($rOpts_entab_leading_whitespace) {
5520
5521             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5522             # not correctly entabbed, nor were side comments: Increase leading
5523             # space count for a padded line to get correct tabbing
5524             if ( $line =~ /^(\s+)(.*)$/ ) {
5525                 my $spaces = length($1);
5526                 if ( $spaces > $leading_space_count ) {
5527                     $leading_space_count = $spaces;
5528                 }
5529             }
5530
5531             my $space_count =
5532               $leading_space_count % $rOpts_entab_leading_whitespace;
5533             my $tab_count =
5534               int( $leading_space_count / $rOpts_entab_leading_whitespace );
5535             my $leading_string = "\t" x $tab_count . SPACE x $space_count;
5536             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5537                 substr( $line, 0, $leading_space_count ) = $leading_string;
5538             }
5539             else {
5540
5541                 # shouldn't happen - program error counting whitespace
5542                 # - skip entabbing
5543                 DEBUG_TABS
5544                   && warning(
5545 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5546                   );
5547             }
5548         }
5549
5550         # Handle option of one tab per level
5551         else {
5552             my $leading_string = ( "\t" x $level );
5553             my $space_count =
5554               $leading_space_count - $level * $rOpts_indent_columns;
5555
5556             # shouldn't happen:
5557             if ( $space_count < 0 ) {
5558
5559                 # But it could be an outdented comment
5560                 if ( $line !~ /^\s*#/ ) {
5561                     DEBUG_TABS
5562                       && warning(
5563 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5564                       );
5565                 }
5566                 $leading_string = ( SPACE x $leading_space_count );
5567             }
5568             else {
5569                 $leading_string .= ( SPACE x $space_count );
5570             }
5571             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5572                 substr( $line, 0, $leading_space_count ) = $leading_string;
5573             }
5574             else {
5575
5576                 # shouldn't happen - program error counting whitespace
5577                 # we'll skip entabbing
5578                 DEBUG_TABS
5579                   && warning(
5580 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5581                   );
5582             }
5583         }
5584     }
5585     my $file_writer_object = $self->[_file_writer_object_];
5586     $file_writer_object->write_code_line( $line . "\n", $Kend );
5587
5588     return;
5589 } ## end sub valign_output_step_D
5590
5591 {    ## closure for sub get_leading_string
5592
5593     my @leading_string_cache;
5594
5595     sub initialize_leading_string_cache {
5596         @leading_string_cache = ();
5597         return;
5598     }
5599
5600     sub get_leading_string {
5601
5602         # define the leading whitespace string for this line..
5603         my ( $self, $leading_whitespace_count, $group_level ) = @_;
5604
5605         # Handle case of zero whitespace, which includes multi-line quotes
5606         # (which may have a finite level; this prevents tab problems)
5607         if ( $leading_whitespace_count <= 0 ) {
5608             return EMPTY_STRING;
5609         }
5610
5611         # look for previous result
5612         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5613             return $leading_string_cache[$leading_whitespace_count];
5614         }
5615
5616         # must compute a string for this number of spaces
5617         my $leading_string;
5618
5619         # Handle simple case of no tabs
5620         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5621         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5622         my $rOpts_entab_leading_whitespace =
5623           $self->[_rOpts_entab_leading_whitespace_];
5624
5625         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5626             || $rOpts_indent_columns <= 0 )
5627         {
5628             $leading_string = ( SPACE x $leading_whitespace_count );
5629         }
5630
5631         # Handle entab option
5632         elsif ($rOpts_entab_leading_whitespace) {
5633             my $space_count =
5634               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5635             my $tab_count = int(
5636                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5637             $leading_string = "\t" x $tab_count . SPACE x $space_count;
5638         }
5639
5640         # Handle option of one tab per level
5641         else {
5642             $leading_string = ( "\t" x $group_level );
5643             my $space_count =
5644               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5645
5646             # shouldn't happen:
5647             if ( $space_count < 0 ) {
5648                 DEBUG_TABS
5649                   && warning(
5650 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5651                   );
5652
5653                 # -- skip entabbing
5654                 $leading_string = ( SPACE x $leading_whitespace_count );
5655             }
5656             else {
5657                 $leading_string .= ( SPACE x $space_count );
5658             }
5659         }
5660         $leading_string_cache[$leading_whitespace_count] = $leading_string;
5661         return $leading_string;
5662     } ## end sub get_leading_string
5663 } ## end get_leading_string
5664
5665 ##########################
5666 # CODE SECTION 10: Summary
5667 ##########################
5668
5669 sub report_anything_unusual {
5670     my $self = shift;
5671
5672     my $outdented_line_count = $self->[_outdented_line_count_];
5673     if ( $outdented_line_count > 0 ) {
5674         write_logfile_entry(
5675             "$outdented_line_count long lines were outdented:\n");
5676         my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5677         write_logfile_entry(
5678             "  First at output line $first_outdented_line_at\n");
5679
5680         if ( $outdented_line_count > 1 ) {
5681             my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5682             write_logfile_entry(
5683                 "   Last at output line $last_outdented_line_at\n");
5684         }
5685         write_logfile_entry(
5686             "  use -noll to prevent outdenting, -l=n to increase line length\n"
5687         );
5688         write_logfile_entry("\n");
5689     }
5690     return;
5691 } ## end sub report_anything_unusual
5692 1;