lib/Perl/Tidy/VerticalAligner.pm

   1 package Perl::Tidy::VerticalAligner;
   2 use strict;
   3 use warnings;
   4 use Carp;
   5 our $VERSION = '20220217';
   6 use Perl::Tidy::VerticalAligner::Alignment;
   7 use Perl::Tidy::VerticalAligner::Line;
   8
   9 use constant DEVEL_MODE => 0;
  10
  11 # The Perl::Tidy::VerticalAligner package collects output lines and
  12 # attempts to line up certain common tokens, such as => and #, which are
  13 # identified by the calling routine.
  14 #
  15 # Usage:
  16 #   - Initiate an object with a call to new().
  17 #   - Write lines one-by-one with calls to valign_input().
  18 #   - Make a final call to flush() to empty the pipeline.
  19 #
  20 # The sub valign_input collects lines into groups.  When a group reaches
  21 # the maximum possible size it is processed for alignment and output.
  22 # The maximum group size is reached whenerver there is a change in indentation
  23 # level, a blank line, a block comment, or an external flush call.  The calling
  24 # routine may also force a break in alignment at any time.
  25 #
  26 # If the calling routine needs to interrupt the output and send other text to
  27 # the output, it must first call flush() to empty the output pipeline.  This
  28 # might occur for example if a block of pod text needs to be sent to the output
  29 # between blocks of code.
  30
  31 # It is essential that a final call to flush() be made. Otherwise some
  32 # final lines of text will be lost.
  33
  34 # Index...
  35 # CODE SECTION 1: Preliminary code, global definitions and sub new
  36 #                 sub new
  37 # CODE SECTION 2: Some Basic Utilities
  38 # CODE SECTION 3: Code to accept input and form groups
  39 #                 sub valign_input
  40 # CODE SECTION 4: Code to process comment lines
  41 #                 sub _flush_comment_lines
  42 # CODE SECTION 5: Code to process groups of code lines
  43 #                 sub _flush_group_lines
  44 # CODE SECTION 6: Output Step A
  45 #                 sub valign_output_step_A
  46 # CODE SECTION 7: Output Step B
  47 #                 sub valign_output_step_B
  48 # CODE SECTION 8: Output Step C
  49 #                 sub valign_output_step_C
  50 # CODE SECTION 9: Output Step D
  51 #                 sub valign_output_step_D
  52 # CODE SECTION 10: Summary
  53 #                 sub report_anything_unusual
  54
  55 ##################################################################
  56 # CODE SECTION 1: Preliminary code, global definitions and sub new
  57 ##################################################################
  58
  59 sub AUTOLOAD {
  60
  61     # Catch any undefined sub calls so that we are sure to get
  62     # some diagnostic information.  This sub should never be called
  63     # except for a programming error.
  64     our $AUTOLOAD;
  65     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  66     my ( $pkg, $fname, $lno ) = caller();
  67     my $my_package = __PACKAGE__;
  68     print STDERR <<EOM;
  69 ======================================================================
  70 Error detected in package '$my_package', version $VERSION
  71 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  72 Called from package: '$pkg'
  73 Called from File '$fname'  at line '$lno'
  74 This error is probably due to a recent programming change
  75 ======================================================================
  76 EOM
  77     exit 1;
  78 }
  79
  80 sub DESTROY {
  81
  82     # required to avoid call to AUTOLOAD in some versions of perl
  83 }
  84
  85 sub Die {
  86     my ($msg) = @_;
  87     Perl::Tidy::Die($msg);
  88     croak "unexpected return from Perl::Tidy::Die";
  89 }
  90
  91 sub Fault {
  92     my ($msg) = @_;
  93
  94     # This routine is called for errors that really should not occur
  95     # except if there has been a bug introduced by a recent program change.
  96     # Please add comments at calls to Fault to explain why the call
  97     # should not occur, and where to look to fix it.
  98     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
  99     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 100     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 101     my $input_stream_name = get_input_stream_name();
 102
 103     Die(<<EOM);
 104 ==============================================================================
 105 While operating on input stream with name: '$input_stream_name'
 106 A fault was detected at line $line0 of sub '$subroutine1'
 107 in file '$filename1'
 108 which was called from line $line1 of sub '$subroutine2'
 109 Message: '$msg'
 110 This is probably an error introduced by a recent programming change.
 111 Perl::Tidy::VerticalAligner.pm reports VERSION='$VERSION'.
 112 ==============================================================================
 113 EOM
 114
 115     # We shouldn't get here, but this return is to keep Perl-Critic from
 116     # complaining.
 117     return;
 118 }
 119
 120 BEGIN {
 121
 122     # Define the fixed indexes for variables in $self, which is an array
 123     # reference.  Note the convention of leading and trailing underscores to
 124     # keep them unique.
 125     # Do not combine with other BEGIN blocks (c101).
 126     my $i = 0;
 127     use constant {
 128         _file_writer_object_ => $i++,
 129         _logger_object_      => $i++,
 130         _diagnostics_object_ => $i++,
 131         _length_function_    => $i++,
 132
 133         _rOpts_                             => $i++,
 134         _rOpts_indent_columns_              => $i++,
 135         _rOpts_tabs_                        => $i++,
 136         _rOpts_entab_leading_whitespace_    => $i++,
 137         _rOpts_fixed_position_side_comment_ => $i++,
 138         _rOpts_minimum_space_to_comment_    => $i++,
 139         _rOpts_valign_code_                 => $i++,
 140         _rOpts_valign_block_comments_       => $i++,
 141         _rOpts_valign_side_comments_        => $i++,
 142
 143         _last_level_written_            => $i++,
 144         _last_side_comment_column_      => $i++,
 145         _last_side_comment_line_number_ => $i++,
 146         _last_side_comment_length_      => $i++,
 147         _last_side_comment_level_       => $i++,
 148         _outdented_line_count_          => $i++,
 149         _first_outdented_line_at_       => $i++,
 150         _last_outdented_line_at_        => $i++,
 151         _consecutive_block_comments_    => $i++,
 152
 153         _rgroup_lines_                => $i++,
 154         _group_level_                 => $i++,
 155         _group_type_                  => $i++,
 156         _group_maximum_line_length_   => $i++,
 157         _zero_count_                  => $i++,
 158         _last_leading_space_count_    => $i++,
 159         _comment_leading_space_count_ => $i++,
 160     };
 161
 162     # Debug flag. This is a relic from the original program development
 163     # looking for problems with tab characters.  Caution: this debug flag can
 164     # produce a lot of output It should be 0 except when debugging small
 165     # scripts.
 166
 167     use constant DEBUG_TABS => 0;
 168
 169     my $debug_warning = sub {
 170         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
 171         return;
 172     };
 173
 174     DEBUG_TABS && $debug_warning->('TABS');
 175 }
 176
 177 # GLOBAL variables
 178 my (
 179
 180     %valign_control_hash,
 181     $valign_control_default,
 182
 183 );
 184
 185 sub check_options {
 186
 187     # This routine is called to check the user-supplied run parameters
 188     # and to configure the control hashes to them.
 189     my ($rOpts) = @_;
 190
 191     # All alignments are done by default
 192     %valign_control_hash    = ();
 193     $valign_control_default = 1;
 194
 195     # If -vil=s is entered without -vxl, assume -vxl='*'
 196     if (  !$rOpts->{'valign-exclusion-list'}
 197         && $rOpts->{'valign-inclusion-list'} )
 198     {
 199         $rOpts->{'valign-exclusion-list'} = '*';
 200     }
 201
 202     # See if the user wants to exclude any alignment types ...
 203     if ( $rOpts->{'valign-exclusion-list'} ) {
 204
 205         # The inclusion list is only relevant if there is an exclusion list
 206         if ( $rOpts->{'valign-inclusion-list'} ) {
 207             my @vil = split /\s+/, $rOpts->{'valign-inclusion-list'};
 208             @valign_control_hash{@vil} = (1) x scalar(@vil);
 209         }
 210
 211         # Note that the -vxl list is done after -vil, so -vxl has priority
 212         # in the event of duplicate entries.
 213         my @vxl = split /\s+/, $rOpts->{'valign-exclusion-list'};
 214         @valign_control_hash{@vxl} = (0) x scalar(@vxl);
 215
 216         # Optimization: revert to defaults if no exclusions.
 217         # This could happen with -vxl='  ' and any -vil list
 218         if ( !@vxl ) {
 219             %valign_control_hash = ();
 220         }
 221
 222         # '$valign_control_default' applies to types not in the hash:
 223         # - If a '*' was entered then set it to be that default type
 224         # - Otherwise, leave it set it to 1
 225         if ( defined( $valign_control_hash{'*'} ) ) {
 226             $valign_control_default = $valign_control_hash{'*'};
 227         }
 228
 229         # Side comments are controlled separately and must be removed
 230         # if given in a list.
 231         if (%valign_control_hash) {
 232             $valign_control_hash{'#'} = 1;
 233         }
 234     }
 235
 236     return;
 237 }
 238
 239 sub new {
 240
 241     my ( $class, @args ) = @_;
 242
 243     my %defaults = (
 244         rOpts              => undef,
 245         file_writer_object => undef,
 246         logger_object      => undef,
 247         diagnostics_object => undef,
 248         length_function    => sub { return length( $_[0] ) },
 249     );
 250     my %args = ( %defaults, @args );
 251
 252     # Initialize other caches and buffers
 253     initialize_step_B_cache();
 254     initialize_valign_buffer();
 255     initialize_leading_string_cache();
 256     initialize_decode();
 257     set_logger_object( $args{logger_object} );
 258
 259     # Initialize all variables in $self.
 260     # To add an item to $self, first define a new constant index in the BEGIN
 261     # section.
 262     my $self = [];
 263
 264     # objects
 265     $self->[_file_writer_object_] = $args{file_writer_object};
 266     $self->[_logger_object_]      = $args{logger_object};
 267     $self->[_diagnostics_object_] = $args{diagnostics_object};
 268     $self->[_length_function_]    = $args{length_function};
 269
 270     # shortcuts to user options
 271     my $rOpts = $args{rOpts};
 272
 273     $self->[_rOpts_]                = $rOpts;
 274     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
 275     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
 276     $self->[_rOpts_entab_leading_whitespace_] =
 277       $rOpts->{'entab-leading-whitespace'};
 278     $self->[_rOpts_fixed_position_side_comment_] =
 279       $rOpts->{'fixed-position-side-comment'};
 280     $self->[_rOpts_minimum_space_to_comment_] =
 281       $rOpts->{'minimum-space-to-comment'};
 282     $self->[_rOpts_valign_code_]           = $rOpts->{'valign-code'};
 283     $self->[_rOpts_valign_block_comments_] = $rOpts->{'valign-block-comments'};
 284     $self->[_rOpts_valign_side_comments_]  = $rOpts->{'valign-side-comments'};
 285
 286     # Batch of lines being collected
 287     $self->[_rgroup_lines_]                = [];
 288     $self->[_group_level_]                 = 0;
 289     $self->[_group_type_]                  = "";
 290     $self->[_group_maximum_line_length_]   = undef;
 291     $self->[_zero_count_]                  = 0;
 292     $self->[_comment_leading_space_count_] = 0;
 293     $self->[_last_leading_space_count_]    = 0;
 294
 295     # Memory of what has been processed
 296     $self->[_last_level_written_]            = -1;
 297     $self->[_last_side_comment_column_]      = 0;
 298     $self->[_last_side_comment_line_number_] = 0;
 299     $self->[_last_side_comment_length_]      = 0;
 300     $self->[_last_side_comment_level_]       = -1;
 301     $self->[_outdented_line_count_]          = 0;
 302     $self->[_first_outdented_line_at_]       = 0;
 303     $self->[_last_outdented_line_at_]        = 0;
 304     $self->[_consecutive_block_comments_]    = 0;
 305
 306     bless $self, $class;
 307     return $self;
 308 }
 309
 310 #################################
 311 # CODE SECTION 2: Basic Utilities
 312 #################################
 313
 314 sub flush {
 315
 316     # flush() is the external call to completely empty the pipeline.
 317     my ($self) = @_;
 318
 319     # push things out the pipline...
 320
 321     # push out any current group lines
 322     $self->_flush_group_lines();
 323
 324     # then anything left in the cache of step_B
 325     $self->_flush_cache();
 326
 327     # then anything left in the buffer of step_C
 328     $self->dump_valign_buffer();
 329
 330     return;
 331 }
 332
 333 sub initialize_for_new_group {
 334     my ($self) = @_;
 335
 336     $self->[_rgroup_lines_]                = [];
 337     $self->[_group_type_]                  = "";
 338     $self->[_zero_count_]                  = 0;
 339     $self->[_comment_leading_space_count_] = 0;
 340     $self->[_last_leading_space_count_]    = 0;
 341     $self->[_group_maximum_line_length_]   = undef;
 342
 343     # Note that the value for _group_level_ is
 344     # handled separately in sub valign_input
 345     return;
 346 }
 347
 348 sub group_line_count {
 349     return +@{ $_[0]->[_rgroup_lines_] };
 350 }
 351
 352 # interface to Perl::Tidy::Diagnostics routines
 353 # For debugging; not currently used
 354 sub write_diagnostics {
 355     my ( $self, $msg ) = @_;
 356     my $diagnostics_object = $self->[_diagnostics_object_];
 357     if ($diagnostics_object) {
 358         $diagnostics_object->write_diagnostics($msg);
 359     }
 360     return;
 361 }
 362
 363 {    ## begin closure for logger routines
 364     my $logger_object;
 365
 366     # Called once per file to initialize the logger object
 367     sub set_logger_object {
 368         $logger_object = shift;
 369         return;
 370     }
 371
 372     sub get_logger_object {
 373         return $logger_object;
 374     }
 375
 376     sub get_input_stream_name {
 377         my $input_stream_name = "";
 378         if ($logger_object) {
 379             $input_stream_name = $logger_object->get_input_stream_name();
 380         }
 381         return $input_stream_name;
 382     }
 383
 384     sub warning {
 385         my ($msg) = @_;
 386         if ($logger_object) {
 387             $logger_object->warning($msg);
 388         }
 389         return;
 390     }
 391
 392     sub write_logfile_entry {
 393         my ($msg) = @_;
 394         if ($logger_object) {
 395             $logger_object->write_logfile_entry($msg);
 396         }
 397         return;
 398     }
 399 }
 400
 401 sub get_cached_line_count {
 402     my $self = shift;
 403     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
 404 }
 405
 406 sub get_recoverable_spaces {
 407
 408     # return the number of spaces (+ means shift right, - means shift left)
 409     # that we would like to shift a group of lines with the same indentation
 410     # to get them to line up with their opening parens
 411     my $indentation = shift;
 412     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
 413 }
 414
 415 ######################################################
 416 # CODE SECTION 3: Code to accept input and form groups
 417 ######################################################
 418
 419 sub push_group_line {
 420
 421     my ( $self, $new_line ) = @_;
 422     my $rgroup_lines = $self->[_rgroup_lines_];
 423     push @{$rgroup_lines}, $new_line;
 424     return;
 425 }
 426
 427 use constant DEBUG_VALIGN      => 0;
 428 use constant SC_LONG_LINE_DIFF => 12;
 429
 430 my %is_closing_token;
 431
 432 BEGIN {
 433     my @q = qw< } ) ] >;
 434     @is_closing_token{@q} = (1) x scalar(@q);
 435 }
 436
 437 sub valign_input {
 438
 439     # Place one line in the current vertical group.
 440     #
 441     # The key input parameters describing each line are:
 442     #     $level          = indentation level of this line
 443     #     $rfields        = ref to array of fields
 444     #     $rpatterns      = ref to array of patterns, one per field
 445     #     $rtokens        = ref to array of tokens starting fields 1,2,..
 446     #     $rfield_lengths = ref to array of field display widths
 447     #
 448     # Here is an example of what this package does.  In this example,
 449     # we are trying to line up both the '=>' and the '#'.
 450     #
 451     #         '18' => 'grave',    #   \`
 452     #         '19' => 'acute',    #   `'
 453     #         '20' => 'caron',    #   \v
 454     # <-tabs-><f1-><--field 2 ---><-f3->
 455     # |            |              |    |
 456     # |            |              |    |
 457     # col1        col2         col3 col4
 458     #
 459     # The calling routine has already broken the entire line into 3 fields as
 460     # indicated.  (So the work of identifying promising common tokens has
 461     # already been done).
 462     #
 463     # In this example, there will be 2 tokens being matched: '=>' and '#'.
 464     # They are the leading parts of fields 2 and 3, but we do need to know
 465     # what they are so that we can dump a group of lines when these tokens
 466     # change.
 467     #
 468     # The fields contain the actual characters of each field.  The patterns
 469     # are like the fields, but they contain mainly token types instead
 470     # of tokens, so they have fewer characters.  They are used to be
 471     # sure we are matching fields of similar type.
 472     #
 473     # In this example, there will be 4 column indexes being adjusted.  The
 474     # first one is always at zero.  The interior columns are at the start of
 475     # the matching tokens, and the last one tracks the maximum line length.
 476     #
 477     # Each time a new line comes in, it joins the current vertical
 478     # group if possible.  Otherwise it causes the current group to be flushed
 479     # and a new group is started.
 480     #
 481     # For each new group member, the column locations are increased, as
 482     # necessary, to make room for the new fields.  When the group is finally
 483     # output, these column numbers are used to compute the amount of spaces of
 484     # padding needed for each field.
 485     #
 486     # Programming note: the fields are assumed not to have any tab characters.
 487     # Tabs have been previously removed except for tabs in quoted strings and
 488     # side comments.  Tabs in these fields can mess up the column counting.
 489     # The log file warns the user if there are any such tabs.
 490
 491     my ( $self, $rline_hash ) = @_;
 492
 493     my $level                     = $rline_hash->{level};
 494     my $level_end                 = $rline_hash->{level_end};
 495     my $indentation               = $rline_hash->{indentation};
 496     my $list_seqno                = $rline_hash->{list_seqno};
 497     my $outdent_long_lines        = $rline_hash->{outdent_long_lines};
 498     my $is_terminal_ternary       = $rline_hash->{is_terminal_ternary};
 499     my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
 500     my $break_alignment_before    = $rline_hash->{break_alignment_before};
 501     my $break_alignment_after     = $rline_hash->{break_alignment_after};
 502     my $Kend                      = $rline_hash->{Kend};
 503     my $ci_level                  = $rline_hash->{ci_level};
 504     my $maximum_line_length       = $rline_hash->{maximum_line_length};
 505     my $forget_side_comment       = $rline_hash->{forget_side_comment};
 506     my $rline_alignment           = $rline_hash->{rline_alignment};
 507
 508     my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
 509       @{$rline_alignment};
 510
 511     # The index '$Kend' is a value which passed along with the line text to sub
 512     # 'write_code_line' for a convergence check.
 513
 514     # number of fields is $jmax
 515     # number of tokens between fields is $jmax-1
 516     my $jmax = @{$rfields} - 1;
 517
 518     my $leading_space_count =
 519       ref($indentation) ? $indentation->get_spaces() : $indentation;
 520
 521     # set outdented flag to be sure we either align within statements or
 522     # across statement boundaries, but not both.
 523     my $is_outdented =
 524       $self->[_last_leading_space_count_] > $leading_space_count;
 525     $self->[_last_leading_space_count_] = $leading_space_count;
 526
 527     # Identify a hanging side comment.  Hanging side comments have an empty
 528     # initial field.
 529     my $is_hanging_side_comment =
 530       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
 531
 532     # Undo outdented flag for a hanging side comment
 533     $is_outdented = 0 if $is_hanging_side_comment;
 534
 535     # Identify a block comment.
 536     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
 537
 538     # Block comment .. update count
 539     if ($is_block_comment) {
 540         $self->[_consecutive_block_comments_]++;
 541     }
 542
 543     # Not a block comment ..
 544     # Forget side comment column if we saw 2 or more block comments,
 545     # and reset the count
 546     else {
 547
 548         if ( $self->[_consecutive_block_comments_] > 1 ) {
 549             $self->forget_side_comment();
 550         }
 551         $self->[_consecutive_block_comments_] = 0;
 552     }
 553
 554     # Reset side comment location if we are entering a new block from level 0.
 555     # This is intended to keep them from drifting too far to the right.
 556     if ($forget_side_comment) {
 557         $self->forget_side_comment();
 558     }
 559
 560     my $is_balanced_line = $level_end == $level;
 561
 562     my $group_level               = $self->[_group_level_];
 563     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
 564
 565     DEBUG_VALIGN && do {
 566         my $nlines = $self->group_line_count();
 567         print STDOUT
 568 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level=$level, group_level=$group_level, level_end=$level_end\n";
 569     };
 570
 571     # Validate cached line if necessary: If we can produce a container
 572     # with just 2 lines total by combining an existing cached opening
 573     # token with the closing token to follow, then we will mark both
 574     # cached flags as valid.
 575     my $cached_line_type = get_cached_line_type();
 576     if ($cached_line_type) {
 577         my $cached_line_opening_flag = get_cached_line_opening_flag();
 578         if ($rvertical_tightness_flags) {
 579             my $cached_seqno = get_cached_seqno();
 580             if (   $cached_seqno
 581                 && $rvertical_tightness_flags->{_vt_seqno}
 582                 && $rvertical_tightness_flags->{_vt_seqno} == $cached_seqno )
 583             {
 584
 585                 # Fix for b1187 and b1188: Normally this step is only done
 586                 # if the number of existing lines is 0 or 1.  But to prevent
 587                 # blinking, this range can be controlled by the caller.
 588                 # If zero values are given we fall back on the range 0 to 1.
 589                 my $line_count = $self->group_line_count();
 590                 my $min_lines  = $rvertical_tightness_flags->{_vt_min_lines};
 591                 my $max_lines  = $rvertical_tightness_flags->{_vt_max_lines};
 592                 $min_lines = 0 unless ($min_lines);
 593                 $max_lines = 1 unless ($max_lines);
 594                 if (   ( $line_count >= $min_lines )
 595                     && ( $line_count <= $max_lines ) )
 596                 {
 597                     $rvertical_tightness_flags->{_vt_valid_flag} ||= 1;
 598                     set_cached_line_valid(1);
 599                 }
 600             }
 601         }
 602
 603         # do not join an opening block brace with an unbalanced line
 604         # unless requested with a flag value of 2
 605         if (   $cached_line_type == 3
 606             && !$self->group_line_count()
 607             && $cached_line_opening_flag < 2
 608             && !$is_balanced_line )
 609         {
 610             set_cached_line_valid(0);
 611         }
 612     }
 613
 614     # shouldn't happen:
 615     if ( $level < 0 ) { $level = 0 }
 616
 617     # do not align code across indentation level changes
 618     # or changes in the maximum line length
 619     # or if vertical alignment is turned off
 620     if (
 621         $level != $group_level
 622         || (   $group_maximum_line_length
 623             && $maximum_line_length != $group_maximum_line_length )
 624         || $is_outdented
 625         || ( $is_block_comment && !$self->[_rOpts_valign_block_comments_] )
 626         || (   !$is_block_comment
 627             && !$self->[_rOpts_valign_side_comments_]
 628             && !$self->[_rOpts_valign_code_] )
 629       )
 630     {
 631
 632         $self->_flush_group_lines( $level - $group_level );
 633
 634         $group_level                         = $level;
 635         $self->[_group_level_]               = $group_level;
 636         $self->[_group_maximum_line_length_] = $maximum_line_length;
 637
 638         # Update leading spaces after the above flush because the leading space
 639         # count may have been changed if the -icp flag is in effect
 640         $leading_space_count =
 641           ref($indentation) ? $indentation->get_spaces() : $indentation;
 642     }
 643
 644     # --------------------------------------------------------------------
 645     # Collect outdentable block COMMENTS
 646     # --------------------------------------------------------------------
 647     my $is_blank_line = "";
 648     if ( $self->[_group_type_] eq 'COMMENT' ) {
 649         if (
 650             (
 651                    $is_block_comment
 652                 && $outdent_long_lines
 653                 && $leading_space_count ==
 654                 $self->[_comment_leading_space_count_]
 655             )
 656             || $is_blank_line
 657           )
 658         {
 659
 660             # Note that for a comment group we are not storing a line
 661             # but rather just the text and its length.
 662             $self->push_group_line(
 663                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 664             return;
 665         }
 666         else {
 667             $self->_flush_group_lines();
 668         }
 669     }
 670
 671     my $rgroup_lines = $self->[_rgroup_lines_];
 672     if ( $break_alignment_before && @{$rgroup_lines} ) {
 673         $rgroup_lines->[-1]->set_end_group(1);
 674     }
 675
 676     # --------------------------------------------------------------------
 677     # add dummy fields for terminal ternary
 678     # --------------------------------------------------------------------
 679     my $j_terminal_match;
 680
 681     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
 682         $j_terminal_match =
 683           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
 684             $rpatterns, $rfield_lengths, $group_level, );
 685         $jmax = @{$rfields} - 1;
 686     }
 687
 688     # --------------------------------------------------------------------
 689     # add dummy fields for else statement
 690     # --------------------------------------------------------------------
 691
 692     # Note the trailing space after 'else' here. If there were no space between
 693     # the else and the next '{' then we would not be able to do vertical
 694     # alignment of the '{'.
 695     if (   $rfields->[0] eq 'else '
 696         && @{$rgroup_lines}
 697         && $is_balanced_line )
 698     {
 699
 700         $j_terminal_match =
 701           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
 702             $rpatterns, $rfield_lengths );
 703         $jmax = @{$rfields} - 1;
 704     }
 705
 706     # --------------------------------------------------------------------
 707     # Handle simple line of code with no fields to match.
 708     # --------------------------------------------------------------------
 709     if ( $jmax <= 0 ) {
 710         $self->[_zero_count_]++;
 711
 712         if ( @{$rgroup_lines}
 713             && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
 714           )
 715         {
 716
 717             # flush the current group if it has some aligned columns..
 718             # or we haven't seen a comment lately
 719             if (   $rgroup_lines->[0]->get_jmax() > 1
 720                 || $self->[_zero_count_] > 3 )
 721             {
 722                 $self->_flush_group_lines();
 723             }
 724         }
 725
 726         # start new COMMENT group if this comment may be outdented
 727         if (   $is_block_comment
 728             && $outdent_long_lines
 729             && !$self->group_line_count() )
 730         {
 731             $self->[_group_type_]                  = 'COMMENT';
 732             $self->[_comment_leading_space_count_] = $leading_space_count;
 733             $self->[_group_maximum_line_length_]   = $maximum_line_length;
 734             $self->push_group_line(
 735                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 736             return;
 737         }
 738
 739         # just write this line directly if no current group, no side comment,
 740         # and no space recovery is needed.
 741         if (   !$self->group_line_count()
 742             && !get_recoverable_spaces($indentation) )
 743         {
 744
 745             $self->valign_output_step_B(
 746                 {
 747                     leading_space_count       => $leading_space_count,
 748                     line                      => $rfields->[0],
 749                     line_length               => $rfield_lengths->[0],
 750                     side_comment_length       => 0,
 751                     outdent_long_lines        => $outdent_long_lines,
 752                     rvertical_tightness_flags => $rvertical_tightness_flags,
 753                     level                     => $level,
 754                     level_end                 => $level_end,
 755                     Kend                      => $Kend,
 756                     maximum_line_length       => $maximum_line_length,
 757                 }
 758             );
 759             return;
 760         }
 761     }
 762     else {
 763         $self->[_zero_count_] = 0;
 764     }
 765
 766     # --------------------------------------------------------------------
 767     # It simplifies things to create a zero length side comment
 768     # if none exists.
 769     # --------------------------------------------------------------------
 770     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
 771         $jmax += 1;
 772         $rtokens->[ $jmax - 1 ]  = '#';
 773         $rfields->[$jmax]        = '';
 774         $rfield_lengths->[$jmax] = 0;
 775         $rpatterns->[$jmax]      = '#';
 776     }
 777
 778     # --------------------------------------------------------------------
 779     # create an object to hold this line
 780     # --------------------------------------------------------------------
 781     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
 782         {
 783             jmax                      => $jmax,
 784             rtokens                   => $rtokens,
 785             rfields                   => $rfields,
 786             rpatterns                 => $rpatterns,
 787             rfield_lengths            => $rfield_lengths,
 788             indentation               => $indentation,
 789             leading_space_count       => $leading_space_count,
 790             outdent_long_lines        => $outdent_long_lines,
 791             list_seqno                => $list_seqno,
 792             list_type                 => "",
 793             is_hanging_side_comment   => $is_hanging_side_comment,
 794             rvertical_tightness_flags => $rvertical_tightness_flags,
 795             is_terminal_ternary       => $is_terminal_ternary,
 796             j_terminal_match          => $j_terminal_match,
 797             end_group                 => $break_alignment_after,
 798             Kend                      => $Kend,
 799             ci_level                  => $ci_level,
 800             level                     => $level,
 801             level_end                 => $level_end,
 802             imax_pair                 => -1,
 803             maximum_line_length       => $maximum_line_length,
 804         }
 805     );
 806
 807     # --------------------------------------------------------------------
 808     # Decide if this is a simple list of items.
 809     # We use this to be less restrictive in deciding what to align.
 810     # --------------------------------------------------------------------
 811     decide_if_list($new_line) if ($list_seqno);
 812
 813     # --------------------------------------------------------------------
 814     # Append this line to the current group (or start new group)
 815     # --------------------------------------------------------------------
 816
 817     $self->push_group_line($new_line);
 818     $self->[_group_maximum_line_length_] = $maximum_line_length;
 819
 820     # output this group if it ends in a terminal else or ternary line
 821     if ( defined($j_terminal_match) ) {
 822         $self->_flush_group_lines();
 823     }
 824
 825     # Force break after jump to lower level
 826     elsif ($level_end < $level
 827         || $is_closing_token{ substr( $rfields->[0], 0, 1 ) } )
 828     {
 829         $self->_flush_group_lines(-1);
 830     }
 831
 832     # --------------------------------------------------------------------
 833     # Some old debugging stuff
 834     # --------------------------------------------------------------------
 835     DEBUG_VALIGN && do {
 836         print STDOUT "exiting valign_input fields:";
 837         dump_array( @{$rfields} );
 838         print STDOUT "exiting valign_input tokens:";
 839         dump_array( @{$rtokens} );
 840         print STDOUT "exiting valign_input patterns:";
 841         dump_array( @{$rpatterns} );
 842     };
 843
 844     return;
 845 }
 846
 847 sub join_hanging_comment {
 848
 849     # Add dummy fields to a hanging side comment to make it look
 850     # like the first line in its potential group.  This simplifies
 851     # the coding.
 852     my ( $new_line, $old_line ) = @_;
 853
 854     my $jmax = $new_line->get_jmax();
 855
 856     # must be 2 fields
 857     return 0 unless $jmax == 1;
 858     my $rtokens = $new_line->get_rtokens();
 859
 860     # the second field must be a comment
 861     return 0 unless $rtokens->[0] eq '#';
 862     my $rfields = $new_line->get_rfields();
 863
 864     # the first field must be empty
 865     return 0 unless $rfields->[0] =~ /^\s*$/;
 866
 867     # the current line must have fewer fields
 868     my $maximum_field_index = $old_line->get_jmax();
 869     return 0
 870       unless $maximum_field_index > $jmax;
 871
 872     # looks ok..
 873     my $rpatterns      = $new_line->get_rpatterns();
 874     my $rfield_lengths = $new_line->get_rfield_lengths();
 875
 876     $new_line->set_is_hanging_side_comment(1);
 877     $jmax = $maximum_field_index;
 878     $new_line->set_jmax($jmax);
 879     $rfields->[$jmax]         = $rfields->[1];
 880     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
 881     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
 882     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
 883     foreach my $j ( 1 .. $jmax - 1 ) {
 884         $rfields->[$j]         = '';
 885         $rfield_lengths->[$j]  = 0;
 886         $rtokens->[ $j - 1 ]   = "";
 887         $rpatterns->[ $j - 1 ] = "";
 888     }
 889     return 1;
 890 }
 891
 892 {    ## closure for sub decide_if_list
 893
 894     my %is_comma_token;
 895
 896     BEGIN {
 897
 898         my @q = qw( => );
 899         push @q, ',';
 900         @is_comma_token{@q} = (1) x scalar(@q);
 901     }
 902
 903     sub decide_if_list {
 904
 905         my $line = shift;
 906
 907         # A list will be taken to be a line with a forced break in which all
 908         # of the field separators are commas or comma-arrows (except for the
 909         # trailing #)
 910
 911         my $rtokens    = $line->get_rtokens();
 912         my $test_token = $rtokens->[0];
 913         my ( $raw_tok, $lev, $tag, $tok_count ) =
 914           decode_alignment_token($test_token);
 915         if ( $is_comma_token{$raw_tok} ) {
 916             my $list_type = $test_token;
 917             my $jmax      = $line->get_jmax();
 918
 919             foreach ( 1 .. $jmax - 2 ) {
 920                 ( $raw_tok, $lev, $tag, $tok_count ) =
 921                   decode_alignment_token( $rtokens->[$_] );
 922                 if ( !$is_comma_token{$raw_tok} ) {
 923                     $list_type = "";
 924                     last;
 925                 }
 926             }
 927             $line->set_list_type($list_type);
 928         }
 929         return;
 930     }
 931 }
 932
 933 sub fix_terminal_ternary {
 934
 935     # Add empty fields as necessary to align a ternary term
 936     # like this:
 937     #
 938     #  my $leapyear =
 939     #      $year % 4   ? 0
 940     #    : $year % 100 ? 1
 941     #    : $year % 400 ? 0
 942     #    :               1;
 943     #
 944     # returns the index of the terminal question token, if any
 945
 946     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
 947         $group_level )
 948       = @_;
 949
 950     return unless ($old_line);
 951     use constant EXPLAIN_TERNARY => 0;
 952
 953     if (%valign_control_hash) {
 954         my $align_ok = $valign_control_hash{'?'};
 955         $align_ok = $valign_control_default unless defined($align_ok);
 956         return unless ($align_ok);
 957     }
 958
 959     my $jmax        = @{$rfields} - 1;
 960     my $rfields_old = $old_line->get_rfields();
 961
 962     my $rpatterns_old       = $old_line->get_rpatterns();
 963     my $rtokens_old         = $old_line->get_rtokens();
 964     my $maximum_field_index = $old_line->get_jmax();
 965
 966     # look for the question mark after the :
 967     my ($jquestion);
 968     my $depth_question;
 969     my $pad        = "";
 970     my $pad_length = 0;
 971     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
 972         my $tok = $rtokens_old->[$j];
 973         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
 974         if ( $raw_tok eq '?' ) {
 975             $depth_question = $lev;
 976
 977             # depth must be correct
 978             next unless ( $depth_question eq $group_level );
 979
 980             $jquestion = $j;
 981             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
 982                 $pad_length = length($1);
 983                 $pad        = " " x $pad_length;
 984             }
 985             else {
 986                 return;    # shouldn't happen
 987             }
 988             last;
 989         }
 990     }
 991     return unless ( defined($jquestion) );    # shouldn't happen
 992
 993     # Now splice the tokens and patterns of the previous line
 994     # into the else line to insure a match.  Add empty fields
 995     # as necessary.
 996     my $jadd = $jquestion;
 997
 998     # Work on copies of the actual arrays in case we have
 999     # to return due to an error
1000     my @fields        = @{$rfields};
1001     my @patterns      = @{$rpatterns};
1002     my @tokens        = @{$rtokens};
1003     my @field_lengths = @{$rfield_lengths};
1004
1005     EXPLAIN_TERNARY && do {
1006         local $" = '><';
1007         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
1008         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
1009         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
1010         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
1011         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
1012         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
1013     };
1014
1015     # handle cases of leading colon on this line
1016     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
1017
1018         my ( $colon, $therest ) = ( $1, $2 );
1019
1020         # Handle sub-case of first field with leading colon plus additional code
1021         # This is the usual situation as at the '1' below:
1022         #  ...
1023         #  : $year % 400 ? 0
1024         #  :               1;
1025         if ($therest) {
1026
1027             # Split the first field after the leading colon and insert padding.
1028             # Note that this padding will remain even if the terminal value goes
1029             # out on a separate line.  This does not seem to look to bad, so no
1030             # mechanism has been included to undo it.
1031             my $field1        = shift @fields;
1032             my $field_length1 = shift @field_lengths;
1033             my $len_colon     = length($colon);
1034             unshift @fields, ( $colon, $pad . $therest );
1035             unshift @field_lengths,
1036               ( $len_colon, $pad_length + $field_length1 - $len_colon );
1037
1038             # change the leading pattern from : to ?
1039             return unless ( $patterns[0] =~ s/^\:/?/ );
1040
1041             # install leading tokens and patterns of existing line
1042             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1043             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1044
1045             # insert appropriate number of empty fields
1046             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
1047             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
1048         }
1049
1050         # handle sub-case of first field just equal to leading colon.
1051         # This can happen for example in the example below where
1052         # the leading '(' would create a new alignment token
1053         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
1054         # :                        ( $mname = $name . '->' );
1055         else {
1056
1057             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
1058
1059             # prepend a leading ? onto the second pattern
1060             $patterns[1] = "?b" . $patterns[1];
1061
1062             # pad the second field
1063             $fields[1]        = $pad . $fields[1];
1064             $field_lengths[1] = $pad_length + $field_lengths[1];
1065
1066             # install leading tokens and patterns of existing line, replacing
1067             # leading token and inserting appropriate number of empty fields
1068             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
1069             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
1070             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
1071             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
1072         }
1073     }
1074
1075     # Handle case of no leading colon on this line.  This will
1076     # be the case when -wba=':' is used.  For example,
1077     #  $year % 400 ? 0 :
1078     #                1;
1079     else {
1080
1081         # install leading tokens and patterns of existing line
1082         $patterns[0] = '?' . 'b' . $patterns[0];
1083         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
1084         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
1085
1086         # insert appropriate number of empty fields
1087         $jadd             = $jquestion + 1;
1088         $fields[0]        = $pad . $fields[0];
1089         $field_lengths[0] = $pad_length + $field_lengths[0];
1090         splice( @fields,        0, 0, ('') x $jadd ) if $jadd;
1091         splice( @field_lengths, 0, 0, (0) x $jadd )  if $jadd;
1092     }
1093
1094     EXPLAIN_TERNARY && do {
1095         local $" = '><';
1096         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
1097         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
1098         print STDOUT "MODIFIED FIELDS=<@fields>\n";
1099     };
1100
1101     # all ok .. update the arrays
1102     @{$rfields}        = @fields;
1103     @{$rtokens}        = @tokens;
1104     @{$rpatterns}      = @patterns;
1105     @{$rfield_lengths} = @field_lengths;
1106
1107     # force a flush after this line
1108     return $jquestion;
1109 }
1110
1111 sub fix_terminal_else {
1112
1113     # Add empty fields as necessary to align a balanced terminal
1114     # else block to a previous if/elsif/unless block,
1115     # like this:
1116     #
1117     #  if   ( 1 || $x ) { print "ok 13\n"; }
1118     #  else             { print "not ok 13\n"; }
1119     #
1120     # returns a positive value if the else block should be indented
1121     #
1122     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1123
1124     return unless ($old_line);
1125     my $jmax = @{$rfields} - 1;
1126     return unless ( $jmax > 0 );
1127
1128     if (%valign_control_hash) {
1129         my $align_ok = $valign_control_hash{'{'};
1130         $align_ok = $valign_control_default unless defined($align_ok);
1131         return unless ($align_ok);
1132     }
1133
1134     # check for balanced else block following if/elsif/unless
1135     my $rfields_old = $old_line->get_rfields();
1136
1137     # TBD: add handling for 'case'
1138     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1139
1140     # look for the opening brace after the else, and extract the depth
1141     my $tok_brace = $rtokens->[0];
1142     my $depth_brace;
1143     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1144
1145     # probably:  "else # side_comment"
1146     else { return }
1147
1148     my $rpatterns_old       = $old_line->get_rpatterns();
1149     my $rtokens_old         = $old_line->get_rtokens();
1150     my $maximum_field_index = $old_line->get_jmax();
1151
1152     # be sure the previous if/elsif is followed by an opening paren
1153     my $jparen    = 0;
1154     my $tok_paren = '(' . $depth_brace;
1155     my $tok_test  = $rtokens_old->[$jparen];
1156     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
1157
1158     # Now find the opening block brace
1159     my ($jbrace);
1160     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1161         my $tok = $rtokens_old->[$j];
1162         if ( $tok eq $tok_brace ) {
1163             $jbrace = $j;
1164             last;
1165         }
1166     }
1167     return unless ( defined($jbrace) );           # shouldn't happen
1168
1169     # Now splice the tokens and patterns of the previous line
1170     # into the else line to insure a match.  Add empty fields
1171     # as necessary.
1172     my $jadd = $jbrace - $jparen;
1173     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1174     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1175     splice( @{$rfields},        1, 0, ('') x $jadd );
1176     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1177
1178     # force a flush after this line if it does not follow a case
1179     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1180     else                                      { return $jbrace }
1181 }
1182
1183 my %is_closing_block_type;
1184
1185 BEGIN {
1186     @_ = qw< } ] >;
1187     @is_closing_block_type{@_} = (1) x scalar(@_);
1188 }
1189
1190 sub check_match {
1191
1192     # See if the current line matches the current vertical alignment group.
1193
1194     my ( $self, $new_line, $base_line, $prev_line ) = @_;
1195
1196     # Given:
1197     #  $new_line  = the line being considered for group inclusion
1198     #  $base_line = the first line of the current group
1199     #  $prev_line = the line just before $new_line
1200
1201     # returns a flag and a value as follows:
1202     #    return (0, $imax_align)     if the line does not match
1203     #    return (1, $imax_align)     if the line matches but does not fit
1204     #    return (2, $imax_align)     if the line matches and fits
1205
1206     # Returns '$imax_align' which is the index of the maximum matching token.
1207     # It will be used in the subsequent left-to-right sweep to align as many
1208     # tokens as possible for lines which partially match.
1209     my $imax_align = -1;
1210
1211     # variable $GoToMsg explains reason for no match, for debugging
1212     my $GoToMsg = "";
1213     use constant EXPLAIN_CHECK_MATCH => 0;
1214
1215     # This is a flag for testing alignment by sub sweep_left_to_right only.
1216     # This test can help find problems with the alignment logic.
1217     # This flag should normally be zero.
1218     use constant TEST_SWEEP_ONLY => 0;
1219
1220     my $jmax                = $new_line->get_jmax();
1221     my $maximum_field_index = $base_line->get_jmax();
1222
1223     my $jlimit = $jmax - 2;
1224     if ( $jmax > $maximum_field_index ) {
1225         $jlimit = $maximum_field_index - 2;
1226     }
1227
1228     if ( $new_line->get_is_hanging_side_comment() ) {
1229
1230         # HSC's can join the group if they fit
1231     }
1232
1233     # Everything else
1234     else {
1235
1236         # A group with hanging side comments ends with the first non hanging
1237         # side comment.
1238         if ( $base_line->get_is_hanging_side_comment() ) {
1239             $GoToMsg = "end of hanging side comments";
1240             goto NO_MATCH;
1241         }
1242
1243         # The number of tokens that this line shares with the previous line
1244         # has been stored with the previous line.  This value was calculated
1245         # and stored by sub 'match_line_pair'.
1246         $imax_align = $prev_line->get_imax_pair();
1247
1248         if ( $imax_align != $jlimit ) {
1249             $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1250             goto NO_MATCH;
1251         }
1252
1253     }
1254
1255     # The tokens match, but the lines must have identical number of
1256     # tokens to join the group.
1257     if ( $maximum_field_index != $jmax ) {
1258         $GoToMsg = "token count differs";
1259         goto NO_MATCH;
1260     }
1261
1262     # The tokens match. Now See if there is space for this line in the
1263     # current group.
1264     if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
1265
1266         EXPLAIN_CHECK_MATCH
1267           && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1268         return ( 2, $jlimit );
1269     }
1270     else {
1271
1272         EXPLAIN_CHECK_MATCH
1273           && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1274         return ( 1, $jlimit );
1275     }
1276
1277   NO_MATCH:
1278
1279     EXPLAIN_CHECK_MATCH
1280       && print
1281       "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1282
1283     return ( 0, $imax_align );
1284 }
1285
1286 sub check_fit {
1287
1288     my ( $self, $new_line, $old_line ) = @_;
1289
1290     # The new line has alignments identical to the current group. Now we have
1291     # to fit the new line into the group without causing a field to exceed the
1292     # line length limit.
1293     #   return true if successful
1294     #   return false if not successful
1295
1296     my $jmax                = $new_line->get_jmax();
1297     my $leading_space_count = $new_line->get_leading_space_count();
1298     my $rfield_lengths      = $new_line->get_rfield_lengths();
1299     my $padding_available   = $old_line->get_available_space_on_right();
1300     my $jmax_old            = $old_line->get_jmax();
1301     my $rtokens_old         = $old_line->get_rtokens();
1302
1303     # Safety check ... only lines with equal array sizes should arrive here
1304     # from sub check_match.  So if this error occurs, look at recent changes in
1305     # sub check_match.  It is only supposed to check the fit of lines with
1306     # identical numbers of alignment tokens.
1307     if ( $jmax_old ne $jmax ) {
1308
1309         warning(<<EOM);
1310 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1311 unexpected difference in array lengths: $jmax != $jmax_old
1312 EOM
1313         return;
1314     }
1315
1316     # Save current columns in case this line does not fit.
1317     my @alignments = $old_line->get_alignments();
1318     foreach my $alignment (@alignments) {
1319         $alignment->save_column();
1320     }
1321
1322     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
1323
1324     # Loop over all alignments ...
1325     my $maximum_field_index = $old_line->get_jmax();
1326     for my $j ( 0 .. $jmax ) {
1327
1328         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1329
1330         if ( $j == 0 ) {
1331             $pad += $leading_space_count;
1332         }
1333
1334         # Keep going if this field does not need any space.
1335         next if ( $pad < 0 );
1336
1337         # Revert to the starting state if does not fit
1338         if ( $pad > $padding_available ) {
1339
1340             ################################################
1341             # Line does not fit -- revert to starting state
1342             ################################################
1343             foreach my $alignment (@alignments) {
1344                 $alignment->restore_column();
1345             }
1346             return;
1347         }
1348
1349         # make room for this field
1350         $old_line->increase_field_width( $j, $pad );
1351         $padding_available -= $pad;
1352     }
1353
1354     ######################################
1355     # The line fits, the match is accepted
1356     ######################################
1357     return 1;
1358
1359 }
1360
1361 sub install_new_alignments {
1362
1363     my ($new_line) = @_;
1364
1365     my $jmax           = $new_line->get_jmax();
1366     my $rfield_lengths = $new_line->get_rfield_lengths();
1367     my $col            = $new_line->get_leading_space_count();
1368
1369     for my $j ( 0 .. $jmax ) {
1370         $col += $rfield_lengths->[$j];
1371
1372         # create initial alignments for the new group
1373         my $alignment =
1374           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1375         $new_line->set_alignment( $j, $alignment );
1376     }
1377     return;
1378 }
1379
1380 sub copy_old_alignments {
1381     my ( $new_line, $old_line ) = @_;
1382     my @new_alignments = $old_line->get_alignments();
1383     $new_line->set_alignments(@new_alignments);
1384     return;
1385 }
1386
1387 sub dump_array {
1388
1389     # debug routine to dump array contents
1390     local $" = ')(';
1391     print STDOUT "(@_)\n";
1392     return;
1393 }
1394
1395 sub level_change {
1396
1397     # compute decrease in level when we remove $diff spaces from the
1398     # leading spaces
1399     my ( $self, $leading_space_count, $diff, $level ) = @_;
1400
1401     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1402     if ($rOpts_indent_columns) {
1403         my $olev =
1404           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1405         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1406         $level -= ( $olev - $nlev );
1407         if ( $level < 0 ) { $level = 0 }
1408     }
1409     return $level;
1410 }
1411
1412 ###############################################
1413 # CODE SECTION 4: Code to process comment lines
1414 ###############################################
1415
1416 sub _flush_comment_lines {
1417
1418     # Output a group consisting of COMMENT lines
1419
1420     my ($self) = @_;
1421     my $rgroup_lines = $self->[_rgroup_lines_];
1422     return unless ( @{$rgroup_lines} );
1423     my $group_level               = $self->[_group_level_];
1424     my $group_maximum_line_length = $self->[_group_maximum_line_length_];
1425     my $leading_space_count       = $self->[_comment_leading_space_count_];
1426     my $leading_string =
1427       $self->get_leading_string( $leading_space_count, $group_level );
1428
1429     # look for excessively long lines
1430     my $max_excess = 0;
1431     foreach my $item ( @{$rgroup_lines} ) {
1432         my ( $str, $str_len ) = @{$item};
1433         my $excess =
1434           $str_len + $leading_space_count - $group_maximum_line_length;
1435         if ( $excess > $max_excess ) {
1436             $max_excess = $excess;
1437         }
1438     }
1439
1440     # zero leading space count if any lines are too long
1441     if ( $max_excess > 0 ) {
1442         $leading_space_count -= $max_excess;
1443         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1444         my $file_writer_object = $self->[_file_writer_object_];
1445         my $last_outdented_line_at =
1446           $file_writer_object->get_output_line_number();
1447         my $nlines = @{$rgroup_lines};
1448         $self->[_last_outdented_line_at_] =
1449           $last_outdented_line_at + $nlines - 1;
1450         my $outdented_line_count = $self->[_outdented_line_count_];
1451         unless ($outdented_line_count) {
1452             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1453         }
1454         $outdented_line_count += $nlines;
1455         $self->[_outdented_line_count_] = $outdented_line_count;
1456     }
1457
1458     # write the lines
1459     my $outdent_long_lines = 0;
1460
1461     foreach my $item ( @{$rgroup_lines} ) {
1462         my ( $str, $str_len, $Kend ) = @{$item};
1463         $self->valign_output_step_B(
1464             {
1465                 leading_space_count       => $leading_space_count,
1466                 line                      => $str,
1467                 line_length               => $str_len,
1468                 side_comment_length       => 0,
1469                 outdent_long_lines        => $outdent_long_lines,
1470                 rvertical_tightness_flags => undef,
1471                 level                     => $group_level,
1472                 level_end                 => $group_level,
1473                 Kend                      => $Kend,
1474                 maximum_line_length       => $group_maximum_line_length,
1475             }
1476         );
1477     }
1478
1479     $self->initialize_for_new_group();
1480     return;
1481 }
1482
1483 ######################################################
1484 # CODE SECTION 5: Code to process groups of code lines
1485 ######################################################
1486
1487 sub _flush_group_lines {
1488
1489     # This is the vertical aligner internal flush, which leaves the cache
1490     # intact
1491     my ( $self, $level_jump ) = @_;
1492
1493     # $level_jump = $next_level-$group_level, if known
1494     #             = undef if not known
1495     # Note: only the sign of the jump is needed
1496
1497     my $rgroup_lines = $self->[_rgroup_lines_];
1498     return unless ( @{$rgroup_lines} );
1499     my $group_type  = $self->[_group_type_];
1500     my $group_level = $self->[_group_level_];
1501
1502     # Debug
1503     0 && do {
1504         my ( $a, $b, $c ) = caller();
1505         my $nlines = @{$rgroup_lines};
1506         print STDOUT
1507 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1508     };
1509
1510     ############################################
1511     # Section 1: Handle a group of COMMENT lines
1512     ############################################
1513     if ( $group_type eq 'COMMENT' ) {
1514         $self->_flush_comment_lines();
1515         return;
1516     }
1517
1518     #########################################################################
1519     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
1520     # aligning happens here in the following steps:
1521     #########################################################################
1522
1523     # STEP 1: Remove most unmatched tokens. They block good alignments.
1524     my ( $max_lev_diff, $saw_side_comment ) =
1525       delete_unmatched_tokens( $rgroup_lines, $group_level );
1526
1527     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1528     # matching common alignments.  The indexes of these subgroups are in the
1529     # return variable.
1530     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1531
1532     # STEP 3: Sweep left to right through the lines, looking for leading
1533     # alignment tokens shared by groups.
1534     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1535       if ( @{$rgroups} > 1 );
1536
1537     # STEP 4: Move side comments to a common column if possible.
1538     if ($saw_side_comment) {
1539         $self->align_side_comments( $rgroup_lines, $rgroups );
1540     }
1541
1542     # STEP 5: For the -lp option, increase the indentation of lists
1543     # to the desired amount, but do not exceed the line length limit.
1544
1545     # We are allowed to shift a group of lines to the right if:
1546     #  (1) its level is greater than the level of the previous group, and
1547     #  (2) its level is greater than the level of the next line to be written.
1548
1549     my $extra_indent_ok;
1550     if ( $group_level > $self->[_last_level_written_] ) {
1551
1552         # Use the level jump to next line to come, if given
1553         if ( defined($level_jump) ) {
1554             $extra_indent_ok = $level_jump < 0;
1555         }
1556
1557         # Otherwise, assume the next line has the level of the end of last line.
1558         # This fixes case c008.
1559         else {
1560             my $level_end = $rgroup_lines->[-1]->get_level_end();
1561             $extra_indent_ok = $group_level > $level_end;
1562         }
1563     }
1564
1565     my $extra_leading_spaces =
1566       $extra_indent_ok
1567       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1568       : 0;
1569
1570     # STEP 6: Output the lines.
1571     # All lines in this group have the same leading spacing and maximum line
1572     # length
1573     my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
1574     my $group_maximum_line_length =
1575       $rgroup_lines->[0]->get_maximum_line_length();
1576
1577     foreach my $line ( @{$rgroup_lines} ) {
1578         $self->valign_output_step_A(
1579             {
1580                 line                 => $line,
1581                 min_ci_gap           => 0,
1582                 do_not_align         => 0,
1583                 group_leader_length  => $group_leader_length,
1584                 extra_leading_spaces => $extra_leading_spaces,
1585                 level                => $group_level,
1586                 maximum_line_length  => $group_maximum_line_length,
1587             }
1588         );
1589     }
1590
1591     # Let the formatter know that this object has been processed and any
1592     # recoverable spaces have been handled.  This is needed for setting the
1593     # closing paren location in -lp mode.
1594     my $object = $rgroup_lines->[0]->get_indentation();
1595     if ( ref($object) ) { $object->set_recoverable_spaces(0) }
1596
1597     $self->initialize_for_new_group();
1598     return;
1599 }
1600
1601 {    ## closure for sub sweep_top_down
1602
1603     my $rall_lines;         # all of the lines
1604     my $grp_level;          # level of all lines
1605     my $rgroups;            # describes the partition of lines we will make here
1606     my $group_line_count;   # number of lines in current partition
1607
1608     BEGIN { $rgroups = [] }
1609
1610     sub initialize_for_new_rgroup {
1611         $group_line_count = 0;
1612         return;
1613     }
1614
1615     sub add_to_rgroup {
1616
1617         my ($jend) = @_;
1618         my $rline = $rall_lines->[$jend];
1619
1620         my $jbeg = $jend;
1621         if ( $group_line_count == 0 ) {
1622             install_new_alignments($rline);
1623         }
1624         else {
1625             my $rvals = pop @{$rgroups};
1626             $jbeg = $rvals->[0];
1627             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1628         }
1629         push @{$rgroups}, [ $jbeg, $jend, undef ];
1630         $group_line_count++;
1631         return;
1632     }
1633
1634     sub get_rgroup_jrange {
1635
1636         return unless @{$rgroups};
1637         return unless ( $group_line_count > 0 );
1638         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1639         return ( $jbeg, $jend );
1640     }
1641
1642     sub end_rgroup {
1643
1644         my ($imax_align) = @_;
1645         return unless @{$rgroups};
1646         return unless ( $group_line_count > 0 );
1647
1648         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1649         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1650
1651         # Undo some alignments of poor two-line combinations.
1652         # We had to wait until now to know the line count.
1653         if ( $jend - $jbeg == 1 ) {
1654             my $line_0 = $rall_lines->[$jbeg];
1655             my $line_1 = $rall_lines->[$jend];
1656
1657             my $imax_pair = $line_1->get_imax_pair();
1658             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1659
1660             ## flag for possible future use:
1661             ## my $is_isolated_pair = $imax_pair < 0
1662             ##  && ( $jbeg == 0
1663             ##    || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
1664
1665             my $imax_prev =
1666               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
1667
1668             my ( $is_marginal, $imax_align_fix ) =
1669               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1670                 $imax_prev );
1671             if ($is_marginal) {
1672                 combine_fields( $line_0, $line_1, $imax_align_fix );
1673             }
1674         }
1675
1676         initialize_for_new_rgroup();
1677         return;
1678     }
1679
1680     sub block_penultimate_match {
1681
1682         # emergency reset to prevent sweep_left_to_right from trying to match a
1683         # failed terminal else match
1684         return unless @{$rgroups} > 1;
1685         $rgroups->[-2]->[2] = -1;
1686         return;
1687     }
1688
1689     sub sweep_top_down {
1690         my ( $self, $rlines, $group_level ) = @_;
1691
1692         # Partition the set of lines into final alignment subgroups
1693         # and store the alignments with the lines.
1694
1695         # The alignment subgroups we are making here are groups of consecutive
1696         # lines which have (1) identical alignment tokens and (2) do not
1697         # exceed the allowable maximum line length.  A later sweep from
1698         # left-to-right ('sweep_lr') will handle additional alignments.
1699
1700         # transfer args to closure variables
1701         $rall_lines = $rlines;
1702         $grp_level  = $group_level;
1703         $rgroups    = [];
1704         initialize_for_new_rgroup();
1705         return unless @{$rlines};    # shouldn't happen
1706
1707         # Unset the _end_group flag for the last line if it it set because it
1708         # is not needed and can causes problems for -lp formatting
1709         $rall_lines->[-1]->set_end_group(0);
1710
1711         # Loop over all lines ...
1712         my $jline = -1;
1713         foreach my $new_line ( @{$rall_lines} ) {
1714             $jline++;
1715
1716             # Start a new subgroup if necessary
1717             if ( !$group_line_count ) {
1718                 add_to_rgroup($jline);
1719                 if ( $new_line->get_end_group() ) {
1720                     end_rgroup(-1);
1721                 }
1722                 next;
1723             }
1724
1725             my $j_terminal_match = $new_line->get_j_terminal_match();
1726             my ( $jbeg, $jend ) = get_rgroup_jrange();
1727             if ( !defined($jbeg) ) {
1728
1729                 # safety check, shouldn't happen
1730                 warning(<<EOM);
1731 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1732 undefined index for group line count $group_line_count
1733 EOM
1734                 $jbeg = $jline;
1735             }
1736             my $base_line = $rall_lines->[$jbeg];
1737
1738             # Initialize a global flag saying if the last line of the group
1739             # should match end of group and also terminate the group.  There
1740             # should be no returns between here and where the flag is handled
1741             # at the bottom.
1742             my $col_matching_terminal = 0;
1743             if ( defined($j_terminal_match) ) {
1744
1745                 # remember the column of the terminal ? or { to match with
1746                 $col_matching_terminal =
1747                   $base_line->get_column($j_terminal_match);
1748
1749                 # Ignore an undefined value as a defensive step; shouldn't
1750                 # normally happen.
1751                 $col_matching_terminal = 0
1752                   unless defined($col_matching_terminal);
1753             }
1754
1755             # -------------------------------------------------------------
1756             # Allow hanging side comment to join current group, if any.  The
1757             # only advantage is to keep the other tokens in the same group. For
1758             # example, this would make the '=' align here:
1759             #  $ax         = 1;           # side comment
1760             #                             # hanging side comment
1761             #  $boondoggle = 5;           # side comment
1762             #  $beetle     = 5;           # side comment
1763
1764             # here is another example..
1765
1766             #  _rtoc_name_count   => {},                   # hash to track ..
1767             #  _rpackage_stack    => [],                   # stack to check ..
1768             #                                              # name changes
1769             #  _rlast_level       => \$last_level,         # brace indentation
1770             #
1771             #
1772             # If this were not desired, the next step could be skipped.
1773             # -------------------------------------------------------------
1774             if ( $new_line->get_is_hanging_side_comment() ) {
1775                 join_hanging_comment( $new_line, $base_line );
1776             }
1777
1778             # If this line has no matching tokens, then flush out the lines
1779             # BEFORE this line unless both it and the previous line have side
1780             # comments.  This prevents this line from pushing side coments out
1781             # to the right.
1782             elsif ( $new_line->get_jmax() == 1 ) {
1783
1784                 # There are no matching tokens, so now check side comments.
1785                 # Programming note: accessing arrays with index -1 is
1786                 # risky in Perl, but we have verified there is at least one
1787                 # line in the group and that there is at least one field.
1788                 my $prev_comment =
1789                   $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
1790                 my $side_comment = $new_line->get_rfields()->[-1];
1791                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1792             }
1793
1794             # See if the new line matches and fits the current group,
1795             # if it still exists. Flush the current group if not.
1796             my $match_code;
1797             if ($group_line_count) {
1798                 ( $match_code, my $imax_align ) =
1799                   $self->check_match( $new_line, $base_line,
1800                     $rall_lines->[ $jline - 1 ] );
1801                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1802             }
1803
1804             # Store the new line
1805             add_to_rgroup($jline);
1806
1807             if ( defined($j_terminal_match) ) {
1808
1809                 # Decide if we should fix a terminal match. We can either:
1810                 # 1. fix it and prevent the sweep_lr from changing it, or
1811                 # 2. leave it alone and let sweep_lr try to fix it.
1812
1813                 # The current logic is to fix it if:
1814                 # -it has not joined to previous lines,
1815                 # -and either the previous subgroup has just 1 line, or
1816                 # -this line matched but did not fit (so sweep won't work)
1817                 my $fixit;
1818                 if ( $group_line_count == 1 ) {
1819                     $fixit ||= $match_code;
1820                     if ( !$fixit ) {
1821                         if ( @{$rgroups} > 1 ) {
1822                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1823                             my $nlines = $jendx - $jbegx + 1;
1824                             $fixit ||= $nlines <= 1;
1825                         }
1826                     }
1827                 }
1828
1829                 if ($fixit) {
1830                     $base_line = $new_line;
1831                     my $col_now = $base_line->get_column($j_terminal_match);
1832
1833                     # Ignore an undefined value as a defensive step; shouldn't
1834                     # normally happen.
1835                     $col_now = 0 unless defined($col_now);
1836
1837                     my $pad = $col_matching_terminal - $col_now;
1838                     my $padding_available =
1839                       $base_line->get_available_space_on_right();
1840                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1841                         $base_line->increase_field_width( $j_terminal_match,
1842                             $pad );
1843                     }
1844
1845                     # do not let sweep_left_to_right change an isolated 'else'
1846                     if ( !$new_line->get_is_terminal_ternary() ) {
1847                         block_penultimate_match();
1848                     }
1849                 }
1850                 end_rgroup(-1);
1851             }
1852
1853             # end the group if we know we cannot match next line.
1854             elsif ( $new_line->get_end_group() ) {
1855                 end_rgroup(-1);
1856             }
1857         } ## end loop over lines
1858
1859         end_rgroup(-1);
1860         return ($rgroups);
1861     }
1862 }
1863
1864 sub two_line_pad {
1865
1866     my ( $line_m, $line, $imax_min ) = @_;
1867
1868     # Given:
1869     #  two isolated (list) lines
1870     #  imax_min = number of common alignment tokens
1871     # Return:
1872     #  $pad_max = maximum suggested pad distnce
1873     #           = 0 if alignment not recommended
1874     # Note that this is only for two lines which do not have alignment tokens
1875     # in common with any other lines.  It is intended for lists, but it might
1876     # also be used for two non-list lines with a common leading '='.
1877
1878     # Allow alignment if the difference in the two unpadded line lengths
1879     # is not more than either line length.  The idea is to avoid
1880     # aligning lines with very different field lengths, like these two:
1881
1882     #   [
1883     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
1884     #       1, 0, 0, 0, undef, 0, 0
1885     #   ];
1886     my $rfield_lengths   = $line->get_rfield_lengths();
1887     my $rfield_lengths_m = $line_m->get_rfield_lengths();
1888
1889     # Safety check - shouldn't happen
1890     return 0
1891       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
1892
1893     my $lensum_m = 0;
1894     my $lensum   = 0;
1895     for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
1896         $lensum_m += $rfield_lengths_m->[$i];
1897         $lensum   += $rfield_lengths->[$i];
1898     }
1899
1900     my ( $lenmin, $lenmax ) =
1901       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
1902
1903     my $patterns_match;
1904     if ( $line_m->get_list_type() && $line->get_list_type() ) {
1905         $patterns_match = 1;
1906         my $rpatterns_m = $line_m->get_rpatterns();
1907         my $rpatterns   = $line->get_rpatterns();
1908         for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
1909             my $pat   = $rpatterns->[$i];
1910             my $pat_m = $rpatterns_m->[$i];
1911             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
1912         }
1913     }
1914
1915     my $pad_max = $lenmax;
1916     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
1917
1918     return $pad_max;
1919 }
1920
1921 sub sweep_left_to_right {
1922
1923     my ( $rlines, $rgroups, $group_level ) = @_;
1924
1925     # So far we have divided the lines into groups having an equal number of
1926     # identical alignments.  Here we are going to look for common leading
1927     # alignments between the different groups and align them when possible.
1928     # For example, the three lines below are in three groups because each line
1929     # has a different number of commas.  In this routine we will sweep from
1930     # left to right, aligning the leading commas as we go, but stopping if we
1931     # hit the line length limit.
1932
1933     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
1934     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
1935     #  my ( $km,  $area, $varea );
1936
1937     # nothing to do if just one group
1938     my $ng_max = @{$rgroups} - 1;
1939     return unless ( $ng_max > 0 );
1940
1941     ############################################################################
1942     # Step 1: Loop over groups to find all common leading alignment tokens
1943     ############################################################################
1944
1945     my $line;
1946     my $rtokens;
1947     my $imax;     # index of maximum non-side-comment alignment token
1948     my $istop;    # an optional stopping index
1949     my $jbeg;     # starting line index
1950     my $jend;     # ending line index
1951
1952     my $line_m;
1953     my $rtokens_m;
1954     my $imax_m;
1955     my $istop_m;
1956     my $jbeg_m;
1957     my $jend_m;
1958
1959     my $istop_mm;
1960
1961     # Look at neighboring pairs of groups and form a simple list
1962     # of all common leading alignment tokens. Foreach such match we
1963     # store [$i, $ng], where
1964     #  $i = index of the token in the line (0,1,...)
1965     #  $ng is the second of the two groups with this common token
1966     my @icommon;
1967
1968     # Hash to hold the maximum alignment change for any group
1969     my %max_move;
1970
1971     # a small number of columns
1972     my $short_pad = 4;
1973
1974     my $ng = -1;
1975     foreach my $item ( @{$rgroups} ) {
1976         $ng++;
1977
1978         $istop_mm = $istop_m;
1979
1980         # save _m values of previous group
1981         $line_m    = $line;
1982         $rtokens_m = $rtokens;
1983         $imax_m    = $imax;
1984         $istop_m   = $istop;
1985         $jbeg_m    = $jbeg;
1986         $jend_m    = $jend;
1987
1988         # Get values for this group. Note that we just have to use values for
1989         # one of the lines of the group since all members have the same
1990         # alignments.
1991         ( $jbeg, $jend, $istop ) = @{$item};
1992
1993         $line    = $rlines->[$jbeg];
1994         $rtokens = $line->get_rtokens();
1995         $imax    = $line->get_jmax() - 2;
1996         $istop   = -1 unless ( defined($istop) );
1997         $istop   = $imax if ( $istop > $imax );
1998
1999         # Initialize on first group
2000         next if ( $ng == 0 );
2001
2002         # Use the minimum index limit of the two groups
2003         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
2004
2005         # Also impose a limit if given.
2006         if ( $istop_m < $imax_min ) {
2007             $imax_min = $istop_m;
2008         }
2009
2010         # Special treatment of two one-line groups isolated from other lines,
2011         # unless they form a simple list or a terminal match.  Otherwise the
2012         # alignment can look strange in some cases.
2013         my $list_type = $rlines->[$jbeg]->get_list_type();
2014         if (
2015                $jend == $jbeg
2016             && $jend_m == $jbeg_m
2017             && ( $ng == 1 || $istop_mm < 0 )
2018             && ( $ng == $ng_max || $istop < 0 )
2019             && !$line->get_j_terminal_match()
2020
2021             # Only do this for imperfect matches. This is normally true except
2022             # when two perfect matches cannot form a group because the line
2023             # length limit would be exceeded. In that case we can still try
2024             # to match as many alignments as possible.
2025             && ( $imax != $imax_m || $istop_m != $imax_m )
2026           )
2027         {
2028
2029             # We will just align assignments and simple lists
2030             next unless ( $imax_min >= 0 );
2031             next
2032               unless ( $rtokens->[0] =~ /^=\d/
2033                 || $list_type );
2034
2035             # In this case we will limit padding to a short distance.  This
2036             # is a compromise to keep some vertical alignment but prevent large
2037             # gaps, which do not look good for just two lines.
2038             my $pad_max =
2039               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
2040             next unless ($pad_max);
2041             my $ng_m = $ng - 1;
2042             $max_move{"$ng_m"} = $pad_max;
2043             $max_move{"$ng"}   = $pad_max;
2044         }
2045
2046         # Loop to find all common leading tokens.
2047         if ( $imax_min >= 0 ) {
2048             foreach my $i ( 0 .. $imax_min ) {
2049                 my $tok   = $rtokens->[$i];
2050                 my $tok_m = $rtokens_m->[$i];
2051                 last if ( $tok ne $tok_m );
2052                 push @icommon, [ $i, $ng, $tok ];
2053             }
2054         }
2055     }
2056     return unless @icommon;
2057
2058     ###########################################################
2059     # Step 2: Reorder and consolidate the list into a task list
2060     ###########################################################
2061
2062     # We have to work first from lowest token index to highest, then by group,
2063     # sort our list first on token index then group number
2064     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
2065
2066     # Make a task list of the form
2067     #   [$i, ng_beg, $ng_end, $tok], ..
2068     # where
2069     #   $i is the index of the token to be aligned
2070     #   $ng_beg..$ng_end is the group range for this action
2071     my @todo;
2072     my ( $i, $ng_end, $tok );
2073     foreach my $item (@icommon) {
2074         my $ng_last = $ng_end;
2075         my $i_last  = $i;
2076         ( $i, $ng_end, $tok ) = @{$item};
2077         my $ng_beg = $ng_end - 1;
2078         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
2079             my $var = pop(@todo);
2080             $ng_beg = $var->[1];
2081         }
2082         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
2083         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
2084     }
2085
2086     ###############################
2087     # Step 3: Execute the task list
2088     ###############################
2089     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
2090         $group_level );
2091     return;
2092 }
2093
2094 {    ## closure for sub do_left_to_right_sweep
2095
2096     my %is_good_alignment_token;
2097
2098     BEGIN {
2099
2100         # One of the most difficult aspects of vertical alignment is knowing
2101         # when not to align.  Alignment can go from looking very nice to very
2102         # bad when overdone.  In the sweep algorithm there are two special
2103         # cases where we may need to limit padding to a '$short_pad' distance
2104         # to avoid some very ugly formatting:
2105
2106         # 1. Two isolated lines with partial alignment
2107         # 2. A 'tail-wag-dog' situation, in which a single terminal
2108         #    line with partial alignment could cause a significant pad
2109         #    increase in many previous lines if allowed to join the alignment.
2110
2111         # For most alignment tokens, we will allow only a small pad to be
2112         # introduced (the hardwired $short_pad variable) . But for some 'good'
2113         # alignments we can be less restrictive.
2114
2115         # These are 'good' alignments, which are allowed more padding:
2116         my @q = qw(
2117           => = ? if unless or || {
2118         );
2119         push @q, ',';
2120         @is_good_alignment_token{@q} = (0) x scalar(@q);
2121
2122         # Promote a few of these to 'best', with essentially no pad limit:
2123         $is_good_alignment_token{'='}      = 1;
2124         $is_good_alignment_token{'if'}     = 1;
2125         $is_good_alignment_token{'unless'} = 1;
2126         $is_good_alignment_token{'=>'}     = 1
2127
2128           # Note the hash values are set so that:
2129           #         if ($is_good_alignment_token{$raw_tok}) => best
2130           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
2131
2132     }
2133
2134     sub do_left_to_right_sweep {
2135         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
2136           = @_;
2137
2138         # $blocking_level[$nj is the level at a match failure between groups
2139         # $ng-1 and $ng
2140         my @blocking_level;
2141         my $group_list_type = $rlines->[0]->get_list_type();
2142
2143         my $move_to_common_column = sub {
2144
2145             # Move the alignment column of token $itok to $col_want for a
2146             # sequence of groups.
2147             my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
2148             return unless ( defined($ngb) && $nge > $ngb );
2149             foreach my $ng ( $ngb .. $nge ) {
2150
2151                 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2152                 my $line  = $rlines->[$jbeg];
2153                 my $col   = $line->get_column($itok);
2154                 my $avail = $line->get_available_space_on_right();
2155                 my $move  = $col_want - $col;
2156                 if ( $move > 0 ) {
2157
2158                     # limit padding increase in isolated two lines
2159                     next
2160                       if ( defined( $rmax_move->{$ng} )
2161                         && $move > $rmax_move->{$ng}
2162                         && !$is_good_alignment_token{$raw_tok} );
2163
2164                     $line->increase_field_width( $itok, $move );
2165                 }
2166                 elsif ( $move < 0 ) {
2167
2168                     # spot to take special action on failure to move
2169                 }
2170             }
2171             return;
2172         };
2173
2174         foreach my $task ( @{$rtodo} ) {
2175             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2176
2177             # Nothing to do for a single group
2178             next unless ( $ng_end > $ng_beg );
2179
2180             my $ng_first;  # index of the first group of a continuous sequence
2181             my $col_want;  # the common alignment column of a sequence of groups
2182             my $col_limit; # maximum column before bumping into max line length
2183             my $line_count_ng_m = 0;
2184             my $jmax_m;
2185             my $it_stop_m;
2186
2187             # Loop over the groups
2188             # 'ix_' = index in the array of lines
2189             # 'ng_' = index in the array of groups
2190             # 'it_' = index in the array of tokens
2191             my $ix_min      = $rgroups->[$ng_beg]->[0];
2192             my $ix_max      = $rgroups->[$ng_end]->[1];
2193             my $lines_total = $ix_max - $ix_min + 1;
2194             foreach my $ng ( $ng_beg .. $ng_end ) {
2195                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2196                 my $line_count_ng = $ix_end - $ix_beg + 1;
2197
2198                 # Important: note that since all lines in a group have a common
2199                 # alignments object, we just have to work on one of the lines
2200                 # (the first line).  All of the rest will be changed
2201                 # automatically.
2202                 my $line = $rlines->[$ix_beg];
2203                 my $jmax = $line->get_jmax();
2204
2205                 # the maximum space without exceeding the line length:
2206                 my $avail   = $line->get_available_space_on_right();
2207                 my $col     = $line->get_column($itok);
2208                 my $col_max = $col + $avail;
2209
2210                 # Initialize on first group
2211                 if ( !defined($col_want) ) {
2212                     $ng_first        = $ng;
2213                     $col_want        = $col;
2214                     $col_limit       = $col_max;
2215                     $line_count_ng_m = $line_count_ng;
2216                     $jmax_m          = $jmax;
2217                     $it_stop_m       = $it_stop;
2218                     next;
2219                 }
2220
2221                 # RULE: Throw a blocking flag upon encountering a token level
2222                 # different from the level of the first blocking token.  For
2223                 # example, in the following example, if the = matches get
2224                 # blocked between two groups as shown, then we want to start
2225                 # blocking matches at the commas, which are at deeper level, so
2226                 # that we do not get the big gaps shown here:
2227
2228                 #  my $unknown3 = pack( "v",          -2 );
2229                 #  my $unknown4 = pack( "v",          0x09 );
2230                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
2231                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
2232                 #  my $root_startblock = pack( "V",   $root_start );
2233                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
2234
2235                 # On the other hand, it is okay to keep matching at the same
2236                 # level such as in a simple list of commas and/or fat arrors.
2237
2238                 my $is_blocked = defined( $blocking_level[$ng] )
2239                   && $lev > $blocking_level[$ng];
2240
2241                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2242                 # Do not let one or two lines with a **different number of
2243                 # alignments** open up a big gap in a large block.  For
2244                 # example, we will prevent something like this, where the first
2245                 # line prys open the rest:
2246
2247             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2248             #  $worksheet->write( "C7", "",                    $format );
2249             #  $worksheet->write( "D7", "",                    $format );
2250             #  $worksheet->write( "D8", "",                    $format );
2251             #  $worksheet->write( "D8", "",                    $format );
2252
2253                 # We should exclude from consideration two groups which are
2254                 # effectively the same but separated because one does not
2255                 # fit in the maximum allowed line length.
2256                 my $is_same_group =
2257                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2258
2259                 my $lines_above = $ix_beg - $ix_min;
2260                 my $lines_below = $lines_total - $lines_above;
2261
2262                 # Increase the tolerable gap for certain favorable factors
2263                 my $factor    = 1;
2264                 my $top_level = $lev == $group_level;
2265
2266                 # Align best top level alignment tokens like '=', 'if', ...
2267                 # A factor of 10 allows a gap of up to 40 spaces
2268                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2269                     $factor = 10;
2270                 }
2271
2272                 # Otherwise allow some minimal padding of good alignments
2273                 elsif (
2274
2275                     defined( $is_good_alignment_token{$raw_tok} )
2276
2277                     # We have to be careful if there are just 2 lines.  This
2278                     # two-line factor allows large gaps only for 2 lines which
2279                     # are simple lists with fewer items on the second line. It
2280                     # gives results similar to previous versions of perltidy.
2281                     && (   $lines_total > 2
2282                         || $group_list_type && $jmax < $jmax_m && $top_level )
2283                   )
2284                 {
2285                     $factor += 1;
2286                     if ($top_level) {
2287                         $factor += 1;
2288                     }
2289                 }
2290
2291                 my $is_big_gap;
2292                 if ( !$is_same_group ) {
2293                     $is_big_gap ||=
2294                       (      $lines_above == 1
2295                           || $lines_above == 2 && $lines_below >= 4 )
2296                       && $col_want > $col + $short_pad * $factor;
2297                     $is_big_gap ||=
2298                       (      $lines_below == 1
2299                           || $lines_below == 2 && $lines_above >= 4 )
2300                       && $col > $col_want + $short_pad * $factor;
2301                 }
2302
2303                 # if match is limited by gap size, stop aligning at this level
2304                 if ($is_big_gap) {
2305                     $blocking_level[$ng] = $lev - 1;
2306                 }
2307
2308                 # quit and restart if it cannot join this batch
2309                 if (   $col_want > $col_max
2310                     || $col > $col_limit
2311                     || $is_big_gap
2312                     || $is_blocked )
2313                 {
2314
2315                     # remember the level of the first blocking token
2316                     if ( !defined( $blocking_level[$ng] ) ) {
2317                         $blocking_level[$ng] = $lev;
2318                     }
2319
2320                     $move_to_common_column->(
2321                         $ng_first, $ng - 1, $itok, $col_want, $raw_tok
2322                     );
2323                     $ng_first        = $ng;
2324                     $col_want        = $col;
2325                     $col_limit       = $col_max;
2326                     $line_count_ng_m = $line_count_ng;
2327                     $jmax_m          = $jmax;
2328                     $it_stop_m       = $it_stop;
2329                     next;
2330                 }
2331
2332                 $line_count_ng_m += $line_count_ng;
2333
2334                 # update the common column and limit
2335                 if ( $col > $col_want )      { $col_want  = $col }
2336                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2337
2338             } ## end loop over groups
2339
2340             if ( $ng_end > $ng_first ) {
2341                 $move_to_common_column->(
2342                     $ng_first, $ng_end, $itok, $col_want, $raw_tok
2343                 );
2344             } ## end loop over groups for one task
2345         } ## end loop over tasks
2346
2347         return;
2348     }
2349 }
2350
2351 sub delete_selected_tokens {
2352
2353     my ( $line_obj, $ridel ) = @_;
2354
2355     # $line_obj    is the line to be modified
2356     # $ridel       is a ref to list of indexes to be deleted
2357
2358     # remove an unused alignment token(s) to improve alignment chances
2359
2360     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2361
2362     my $jmax_old           = $line_obj->get_jmax();
2363     my $rfields_old        = $line_obj->get_rfields();
2364     my $rfield_lengths_old = $line_obj->get_rfield_lengths();
2365     my $rpatterns_old      = $line_obj->get_rpatterns();
2366     my $rtokens_old        = $line_obj->get_rtokens();
2367     my $j_terminal_match   = $line_obj->get_j_terminal_match();
2368
2369     use constant EXPLAIN_DELETE_SELECTED => 0;
2370
2371     local $" = '> <';
2372     EXPLAIN_DELETE_SELECTED && print <<EOM;
2373 delete indexes: <@{$ridel}>
2374 old jmax: $jmax_old
2375 old tokens: <@{$rtokens_old}>
2376 old patterns: <@{$rpatterns_old}>
2377 old fields: <@{$rfields_old}>
2378 old field_lengths: <@{$rfield_lengths_old}>
2379 EOM
2380
2381     my $rfields_new        = [];
2382     my $rpatterns_new      = [];
2383     my $rtokens_new        = [];
2384     my $rfield_lengths_new = [];
2385
2386     # Convert deletion list to a hash to allow any order, multiple entries,
2387     # and avoid problems with index values out of range
2388     my %delete_me;
2389     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2390
2391     my $pattern      = $rpatterns_old->[0];
2392     my $field        = $rfields_old->[0];
2393     my $field_length = $rfield_lengths_old->[0];
2394     push @{$rfields_new},        $field;
2395     push @{$rfield_lengths_new}, $field_length;
2396     push @{$rpatterns_new},      $pattern;
2397
2398     # Loop to either copy items or concatenate fields and patterns
2399     my $jmin_del;
2400     for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
2401         my $token        = $rtokens_old->[$j];
2402         my $field        = $rfields_old->[ $j + 1 ];
2403         my $field_length = $rfield_lengths_old->[ $j + 1 ];
2404         my $pattern      = $rpatterns_old->[ $j + 1 ];
2405         if ( !$delete_me{$j} ) {
2406             push @{$rtokens_new},        $token;
2407             push @{$rfields_new},        $field;
2408             push @{$rpatterns_new},      $pattern;
2409             push @{$rfield_lengths_new}, $field_length;
2410         }
2411         else {
2412             if ( !defined($jmin_del) ) { $jmin_del = $j }
2413             $rfields_new->[-1] .= $field;
2414             $rfield_lengths_new->[-1] += $field_length;
2415             $rpatterns_new->[-1] .= $pattern;
2416         }
2417     }
2418
2419     # ----- x ------ x ------ x ------
2420     #t      0        1        2        <- token indexing
2421     #f   0      1        2        3    <- field and pattern
2422
2423     my $jmax_new = @{$rfields_new} - 1;
2424     $line_obj->set_rtokens($rtokens_new);
2425     $line_obj->set_rpatterns($rpatterns_new);
2426     $line_obj->set_rfields($rfields_new);
2427     $line_obj->set_rfield_lengths($rfield_lengths_new);
2428     $line_obj->set_jmax($jmax_new);
2429
2430     # The value of j_terminal_match will be incorrect if we delete tokens prior
2431     # to it. We will have to give up on aligning the terminal tokens if this
2432     # happens.
2433     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2434         $line_obj->set_j_terminal_match(undef);
2435     }
2436
2437     # update list type -
2438     if ( $line_obj->get_list_seqno() ) {
2439
2440         ## This works, but for efficiency see if we need to make a change:
2441         ## decide_if_list($line_obj);
2442
2443         # An existing list will still be a list but with possibly different
2444         # leading token
2445         my $old_list_type = $line_obj->get_list_type();
2446         my $new_list_type = "";
2447         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2448             $new_list_type = $rtokens_new->[0];
2449         }
2450         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2451             decide_if_list($line_obj);
2452         }
2453     }
2454
2455     EXPLAIN_DELETE_SELECTED && print <<EOM;
2456
2457 new jmax: $jmax_new
2458 new tokens: <@{$rtokens_new}>
2459 new patterns: <@{$rpatterns_new}>
2460 new fields: <@{$rfields_new}>
2461 EOM
2462     return;
2463 }
2464
2465 {    ## closure for sub decode_alignment_token
2466
2467     # This routine is called repeatedly for each token, so it needs to be
2468     # efficient.  We can speed things up by remembering the inputs and outputs
2469     # in a hash.
2470     my %decoded_token;
2471
2472     sub initialize_decode {
2473
2474         # We will re-initialize the hash for each file. Otherwise, there is
2475         # a danger that the hash can become arbitrarily large if a very large
2476         # number of files is processed at once.
2477         %decoded_token = ();
2478         return;
2479     }
2480
2481     sub decode_alignment_token {
2482
2483         # Unpack the values packed in an alignment token
2484         #
2485         # Usage:
2486         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
2487         #          decode_alignment_token($token);
2488
2489         # Alignment tokens have a trailing decimal level and optional tag (for
2490         # commas):
2491         # For example, the first comma in the following line
2492         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
2493         # is decorated as follows:
2494         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
2495
2496         # An optional token count may be appended with a leading dot.
2497         # Currently this is only done for '=' tokens but this could change.
2498         # For example, consider the following line:
2499         #   $nport   = $port = shift || $name;
2500         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2501         # The second '=' will be '=0.2' [level 0, second equals]
2502         my ($tok) = @_;
2503
2504         if ( defined( $decoded_token{$tok} ) ) {
2505             return @{ $decoded_token{$tok} };
2506         }
2507
2508         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
2509         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2510             $raw_tok   = $1;
2511             $lev       = $2;
2512             $tag       = $3 if ($3);
2513             $tok_count = $5 if ($5);
2514         }
2515         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2516         $decoded_token{$tok} = \@vals;
2517         return @vals;
2518     }
2519 }
2520
2521 {    ## closure for sub delete_unmatched_tokens
2522
2523     my %is_assignment;
2524     my %keep_after_deleted_assignment;
2525
2526     BEGIN {
2527         my @q;
2528
2529         @q = qw(
2530           = **= += *= &= <<= &&=
2531           -= /= |= >>= ||= //=
2532           .= %= ^=
2533           x=
2534         );
2535         @is_assignment{@q} = (1) x scalar(@q);
2536
2537         # These tokens may be kept following an = deletion
2538         @q = qw(
2539           if unless or ||
2540         );
2541         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2542
2543     }
2544
2545     # This flag is for testing only and should normally be zero.
2546     use constant TEST_DELETE_NULL => 0;
2547
2548     sub delete_unmatched_tokens {
2549         my ( $rlines, $group_level ) = @_;
2550
2551         # This is a preliminary step in vertical alignment in which we remove
2552         # as many obviously un-needed alignment tokens as possible.  This will
2553         # prevent them from interfering with the final alignment.
2554
2555         # These are the return values
2556         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
2557         my $saw_side_comment = 0;    # used to avoid a call for side comments
2558
2559         # Handle no lines -- shouldn't happen
2560         return unless @{$rlines};
2561
2562         # Handle a single line
2563         if ( @{$rlines} == 1 ) {
2564             my $line   = $rlines->[0];
2565             my $jmax   = $line->get_jmax();
2566             my $length = $line->get_rfield_lengths()->[$jmax];
2567             $saw_side_comment = $length > 0;
2568             return ( $max_lev_diff, $saw_side_comment );
2569         }
2570
2571         my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
2572
2573         # ignore hanging side comments in these operations
2574         my @filtered   = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
2575         my $rnew_lines = \@filtered;
2576
2577         $saw_side_comment = @filtered != @{$rlines};
2578         $max_lev_diff     = 0;
2579
2580         # nothing to do if all lines were hanging side comments
2581         my $jmax = @{$rnew_lines} - 1;
2582         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2583
2584         my @equals_info;
2585         my @line_info;
2586         my %is_good_tok;
2587
2588         # create a hash of tokens for each line
2589         my $rline_hashes = [];
2590         foreach my $line ( @{$rnew_lines} ) {
2591             my $rhash     = {};
2592             my $rtokens   = $line->get_rtokens();
2593             my $rpatterns = $line->get_rpatterns();
2594             my $i         = 0;
2595             my ( $i_eq, $tok_eq, $pat_eq );
2596             my ( $lev_min, $lev_max );
2597             foreach my $tok ( @{$rtokens} ) {
2598                 my ( $raw_tok, $lev, $tag, $tok_count ) =
2599                   decode_alignment_token($tok);
2600
2601                 if ( $tok ne '#' ) {
2602                     if ( !defined($lev_min) ) {
2603                         $lev_min = $lev;
2604                         $lev_max = $lev;
2605                     }
2606                     else {
2607                         if ( $lev < $lev_min ) { $lev_min = $lev }
2608                         if ( $lev > $lev_max ) { $lev_max = $lev }
2609                     }
2610                 }
2611                 else {
2612                     if ( !$saw_side_comment ) {
2613                         my $length = $line->get_rfield_lengths()->[ $i + 1 ];
2614                         $saw_side_comment ||= $length;
2615                     }
2616                 }
2617
2618                 # Possible future upgrade: for multiple matches,
2619                 # record [$i1, $i2, ..] instead of $i
2620                 $rhash->{$tok} =
2621                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2622
2623                 # remember the first equals at line level
2624                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2625
2626                     if ( $lev eq $group_level ) {
2627                         $i_eq   = $i;
2628                         $tok_eq = $tok;
2629                         $pat_eq = $rpatterns->[$i];
2630                     }
2631                 }
2632                 $i++;
2633             }
2634             push @{$rline_hashes}, $rhash;
2635             push @equals_info,     [ $i_eq,    $tok_eq, $pat_eq ];
2636             push @line_info,       [ $lev_min, $lev_max ];
2637             if ( defined($lev_min) ) {
2638                 my $lev_diff = $lev_max - $lev_min;
2639                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2640             }
2641         }
2642
2643         # compare each line pair and record matches
2644         my $rtok_hash = {};
2645         my $nr        = 0;
2646         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
2647             my $nl = $nr;
2648             $nr = 0;
2649             my $jr      = $jl + 1;
2650             my $rhash_l = $rline_hashes->[$jl];
2651             my $rhash_r = $rline_hashes->[$jr];
2652             my $count   = 0;                      # UNUSED NOW?
2653             my $ntoks   = 0;
2654             foreach my $tok ( keys %{$rhash_l} ) {
2655                 $ntoks++;
2656                 if ( defined( $rhash_r->{$tok} ) ) {
2657                     if ( $tok ne '#' ) { $count++; }
2658                     my $il = $rhash_l->{$tok}->[0];
2659                     my $ir = $rhash_r->{$tok}->[0];
2660                     $rhash_l->{$tok}->[2] = $ir;
2661                     $rhash_r->{$tok}->[1] = $il;
2662                     if ( $tok ne '#' ) {
2663                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2664                         $nr++;
2665                     }
2666                 }
2667             }
2668
2669             # Set a line break if no matching tokens between these lines
2670             # (this is not strictly necessary now but does not hurt)
2671             if ( $nr == 0 && $nl > 0 ) {
2672                 $rnew_lines->[$jl]->set_end_group(1);
2673             }
2674
2675             # Also set a line break if both lines have simple equals but with
2676             # different leading characters in patterns.  This check is similar
2677             # to one in sub check_match, and will prevent sub
2678             # prune_alignment_tree from removing alignments which otherwise
2679             # should be kept. This fix is rarely needed, but it can
2680             # occasionally improve formatting.
2681             # For example:
2682             #     my $name = $this->{Name};
2683             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
2684             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
2685             #     my $cast    = ( $type     ? "($type *)" : "" );
2686             # The last two lines start with 'my' and will not match the
2687             # previous line starting with $type, so we do not want
2688             # prune_alignment tree to delete their ? : alignments at a deeper
2689             # level.
2690             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2691             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2692             if ( defined($i_eq_l) && defined($i_eq_r) ) {
2693
2694                 # Also, do not align equals across a change in ci level
2695                 my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
2696                   $rnew_lines->[$jr]->get_ci_level();
2697
2698                 if (
2699                        $tok_eq_l eq $tok_eq_r
2700                     && $i_eq_l == 0
2701                     && $i_eq_r == 0
2702                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2703                         || $ci_jump )
2704                   )
2705                 {
2706                     $rnew_lines->[$jl]->set_end_group(1);
2707                 }
2708             }
2709         }
2710
2711         # find subgroups
2712         my @subgroups;
2713         push @subgroups, [ 0, $jmax ];
2714         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
2715             if ( $rnew_lines->[$jl]->get_end_group() ) {
2716                 $subgroups[-1]->[1] = $jl;
2717                 push @subgroups, [ $jl + 1, $jmax ];
2718             }
2719         }
2720
2721         # flag to allow skipping pass 2
2722         my $saw_large_group;
2723
2724         ############################################################
2725         # PASS 1 over subgroups to remove unmatched alignment tokens
2726         ############################################################
2727         foreach my $item (@subgroups) {
2728             my ( $jbeg, $jend ) = @{$item};
2729
2730             my $nlines = $jend - $jbeg + 1;
2731
2732             ####################################################
2733             # Look for complete if/elsif/else and ternary blocks
2734             ####################################################
2735
2736             # We are looking for a common '$dividing_token' like these:
2737
2738             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
2739             #    elsif ($b)          { $p->{'type'} = 'b'; }
2740             #    elsif ($s)          { $p->{'type'} = 's'; }
2741             #    else                { $p->{'type'} = ''; }
2742             #                        ^----------- dividing_token
2743
2744             #   my $severity =
2745             #      !$routine                     ? '[PFX]'
2746             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
2747             #     : $routine =~ /ck_warn/        ? 'W'
2748             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2749             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
2750             #     : $routine =~ /vWARN\d/        ? '[WDS]'
2751             #     :                                '[PFX]';
2752             #                                    ^----------- dividing_token
2753
2754             # Only look for groups which are more than 2 lines long.  Two lines
2755             # can get messed up doing this, probably due to the various
2756             # two-line rules.
2757
2758             my $dividing_token;
2759             my %token_line_count;
2760             if ( $nlines > 2 ) {
2761
2762                 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2763                     my %seen;
2764                     my $line    = $rnew_lines->[$jj];
2765                     my $rtokens = $line->get_rtokens();
2766                     foreach my $tok ( @{$rtokens} ) {
2767                         if ( !$seen{$tok} ) {
2768                             $seen{$tok}++;
2769                             $token_line_count{$tok}++;
2770                         }
2771                     }
2772                 }
2773
2774                 foreach my $tok ( keys %token_line_count ) {
2775                     if ( $token_line_count{$tok} == $nlines ) {
2776                         if (   substr( $tok, 0, 1 ) eq '?'
2777                             || substr( $tok, 0, 1 ) eq '{'
2778                             && $tok =~ /^\{\d+if/ )
2779                         {
2780                             $dividing_token = $tok;
2781                             last;
2782                         }
2783                     }
2784                 }
2785             }
2786
2787             #####################################################
2788             # Loop over lines to remove unwanted alignment tokens
2789             #####################################################
2790             for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2791                 my $line    = $rnew_lines->[$jj];
2792                 my $rtokens = $line->get_rtokens();
2793                 my $rhash   = $rline_hashes->[$jj];
2794                 my $i_eq    = $equals_info[$jj]->[0];
2795                 my @idel;
2796                 my $imax = @{$rtokens} - 2;
2797                 my $delete_above_level;
2798                 my $deleted_assignment_token;
2799
2800                 my $saw_dividing_token = "";
2801                 $saw_large_group ||= $nlines > 2 && $imax > 1;
2802
2803                 # Loop over all alignment tokens
2804                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2805                     my $tok = $rtokens->[$i];
2806                     next if ( $tok eq '#' );    # shouldn't happen
2807                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2808                       @{ $rhash->{$tok} };
2809
2810                     #######################################################
2811                     # Here is the basic RULE: remove an unmatched alignment
2812                     # which does not occur in the surrounding lines.
2813                     #######################################################
2814                     my $delete_me = !defined($il) && !defined($ir);
2815
2816                     # Apply any user controls. Note that not all lines pass
2817                     # this way so they have to be applied elsewhere too.
2818                     my $align_ok = 1;
2819                     if (%valign_control_hash) {
2820                         $align_ok = $valign_control_hash{$raw_tok};
2821                         $align_ok = $valign_control_default
2822                           unless defined($align_ok);
2823                         $delete_me ||= !$align_ok;
2824                     }
2825
2826                     # But now we modify this with exceptions...
2827
2828                     # EXCEPTION 1: If we are in a complete ternary or
2829                     # if/elsif/else group, and this token is not on every line
2830                     # of the group, should we delete it to preserve overall
2831                     # alignment?
2832                     if ($dividing_token) {
2833                         if ( $token_line_count{$tok} >= $nlines ) {
2834                             $saw_dividing_token ||= $tok eq $dividing_token;
2835                         }
2836                         else {
2837
2838                             # For shorter runs, delete toks to save alignment.
2839                             # For longer runs, keep toks after the '{' or '?'
2840                             # to allow sub-alignments within braces.  The
2841                             # number 5 lines is arbitrary but seems to work ok.
2842                             $delete_me ||=
2843                               ( $nlines < 5 || !$saw_dividing_token );
2844                         }
2845                     }
2846
2847                     # EXCEPTION 2: Remove all tokens above a certain level
2848                     # following a previous deletion.  For example, we have to
2849                     # remove tagged higher level alignment tokens following a
2850                     # '=>' deletion because the tags of higher level tokens
2851                     # will now be incorrect. For example, this will prevent
2852                     # aligning commas as follows after deleting the second '=>'
2853                     #    $w->insert(
2854                     #         ListBox => origin => [ 270, 160 ],
2855                     #         size    => [ 200,           55 ],
2856                     #    );
2857                     if ( defined($delete_above_level) ) {
2858                         if ( $lev > $delete_above_level ) {
2859                             $delete_me ||= 1;    #$tag;
2860                         }
2861                         else { $delete_above_level = undef }
2862                     }
2863
2864                     # EXCEPTION 3: Remove all but certain tokens after an
2865                     # assignment deletion.
2866                     if (
2867                         $deleted_assignment_token
2868                         && ( $lev > $group_level
2869                             || !$keep_after_deleted_assignment{$raw_tok} )
2870                       )
2871                     {
2872                         $delete_me ||= 1;
2873                     }
2874
2875                     # EXCEPTION 4: Do not touch the first line of a 2 line
2876                     # terminal match, such as below, because j_terminal has
2877                     # already been set.
2878                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
2879                     #    else      { $tago = $tagc = ''; }
2880                     # But see snippets 'else1.t' and 'else2.t'
2881                     $delete_me = 0
2882                       if ( $jj == $jbeg
2883                         && $has_terminal_match
2884                         && $nlines == 2 );
2885
2886                     # EXCEPTION 5: misc additional rules for commas and equals
2887                     if ($delete_me) {
2888
2889                         # okay to delete second and higher copies of a token
2890                         if ( $tok_count == 1 ) {
2891
2892                             # for a comma...
2893                             if ( $raw_tok eq ',' ) {
2894
2895                                 # Do not delete commas before an equals
2896                                 $delete_me = 0
2897                                   if ( defined($i_eq) && $i < $i_eq );
2898
2899                                 # Do not delete line-level commas
2900                                 $delete_me = 0 if ( $lev <= $group_level );
2901                             }
2902
2903                             # For an assignment at group level..
2904                             if (   $is_assignment{$raw_tok}
2905                                 && $lev == $group_level )
2906                             {
2907
2908                                 # Do not delete if it is the last alignment of
2909                                 # multiple tokens; this will prevent some
2910                                 # undesirable alignments
2911                                 if ( $imax > 0 && $i == $imax ) {
2912                                     $delete_me = 0;
2913                                 }
2914
2915                                 # Otherwise, set a flag to delete most
2916                                 # remaining tokens
2917                                 else { $deleted_assignment_token = $raw_tok }
2918                             }
2919                         }
2920                     }
2921
2922                     # Do not let a user exclusion be reactivated by above rules
2923                     $delete_me ||= !$align_ok;
2924
2925                     #####################################
2926                     # Add this token to the deletion list
2927                     #####################################
2928                     if ($delete_me) {
2929                         push @idel, $i;
2930
2931                         # update deletion propagation flags
2932                         if ( !defined($delete_above_level)
2933                             || $lev < $delete_above_level )
2934                         {
2935
2936                             # delete all following higher level alignments
2937                             $delete_above_level = $lev;
2938
2939                             # but keep deleting after => to next lower level
2940                             # to avoid some bizarre alignments
2941                             if ( $raw_tok eq '=>' ) {
2942                                 $delete_above_level = $lev - 1;
2943                             }
2944                         }
2945                     }
2946                 }    # End loop over alignment tokens
2947
2948                 # Process all deletion requests for this line
2949                 if (@idel) {
2950                     delete_selected_tokens( $line, \@idel );
2951                 }
2952             }    # End loopover lines
2953         }    # End loop over subgroups
2954
2955         #################################################
2956         # PASS 2 over subgroups to remove null alignments
2957         #################################################
2958
2959         # This pass is only used for testing. It is helping to identify
2960         # alignment situations which might be improved with a future more
2961         # general algorithm which adds a tail matching capability.
2962         if (TEST_DELETE_NULL) {
2963             delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
2964               if ($saw_large_group);
2965         }
2966
2967         # PASS 3: Construct a tree of matched lines and delete some small deeper
2968         # levels of tokens.  They also block good alignments.
2969         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2970
2971         # PASS 4: compare all lines for common tokens
2972         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2973
2974         return ( $max_lev_diff, $saw_side_comment );
2975     }
2976 }
2977
2978 sub delete_null_alignments {
2979     my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
2980
2981     # This is an optional second pass for deleting alignment tokens which can
2982     # occasionally improve alignment.  We look for and remove 'null
2983     # alignments', which are alignments that require no padding.  So we can
2984     # 'cheat' and delete them. For example, notice the '=~' alignment in the
2985     # first two lines of the following code:
2986
2987     #    $sysname .= 'del' if $self->label =~ /deletion/;
2988     #    $sysname .= 'ins' if $self->label =~ /insertion/;
2989     #    $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
2990
2991     # These '=~' tokens are already aligned because they are both the same
2992     # distance from the previous alignment token, the 'if'.  So we can
2993     # eliminate them as alignments.  The advantage is that in some cases, such
2994     # as this one, this will allow other tokens to be aligned. In this case we
2995     # then get the 'if' tokens to align:
2996
2997     #   $sysname .= 'del'                     if $self->label =~ /deletion/;
2998     #   $sysname .= 'ins'                     if $self->label =~ /insertion/;
2999     #   $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
3000
3001     # The following rules for limiting this operation have been found to
3002     # work well and avoid problems:
3003
3004     # Rule 1. We only consider a sequence of lines which have the same
3005     # sequence of alignment tokens.
3006
3007     # Rule 2. We never eliminate the first alignment token.  One reason is that
3008     # lines may have different leading indentation spaces, so keeping the
3009     # first alignment token insures that our length measurements start at
3010     # a well-defined point.  Another reason is that nothing is gained because
3011     # the left-to-right sweep can always handle alignment of this token.
3012
3013     # Rule 3. We require that the first alignment token exist in either
3014     # a previous line or a subsequent line.  The reason is that this avoids
3015     # changing two-line matches which go through special logic.
3016
3017     # Rule 4. Do not delete a token which occurs in a previous or subsequent
3018     # line. For example, in the above example, it was ok to eliminate the '=~'
3019     # token from two lines because it did not occur in a surrounding line.
3020     # If it did occur in a surrounding line, the result could be confusing
3021     # or even incorrectly aligned.
3022
3023     # A consequence of these rules is that we only need to consider subgroups
3024     # with at least 3 lines and 2 alignment tokens.
3025
3026     # The subgroup line index range
3027     my ( $jbeg, $jend );
3028
3029     # Vars to keep track of the start of a current sequence of matching
3030     # lines.
3031     my $rtokens_match;
3032     my $rfield_lengths_match;
3033     my $j_match_beg;
3034     my $j_match_end;
3035     my $imax_match;
3036     my $rneed_pad;
3037
3038     # Vars for a line being tested
3039     my $rtokens;
3040     my $rfield_lengths;
3041     my $imax;
3042
3043     my $start_match = sub {
3044         my ($jj) = @_;
3045         $rtokens_match        = $rtokens;
3046         $rfield_lengths_match = $rfield_lengths;
3047         $j_match_beg          = $jj;
3048         $j_match_end          = $jj;
3049         $imax_match           = $imax;
3050         $rneed_pad            = [];
3051         return;
3052     };
3053
3054     my $add_to_match = sub {
3055         my ($jj) = @_;
3056         $j_match_end = $jj;
3057
3058         # Keep track of any padding that would be needed for each token
3059         for ( my $i = 0 ; $i <= $imax ; $i++ ) {
3060             next if ( $rneed_pad->[$i] );
3061             my $length       = $rfield_lengths->[$i];
3062             my $length_match = $rfield_lengths_match->[$i];
3063             if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
3064         }
3065         return;
3066     };
3067
3068     my $end_match = sub {
3069         return unless ( $j_match_end > $j_match_beg );
3070         my $nlines    = $j_match_end - $j_match_beg + 1;
3071         my $rhash_beg = $rline_hashes->[$j_match_beg];
3072         my $rhash_end = $rline_hashes->[$j_match_end];
3073         my @idel;
3074
3075         # Do not delete unless the first token also occurs in a surrounding line
3076         my $tok0 = $rtokens_match->[0];
3077         return
3078           unless (
3079             (
3080                    $j_match_beg > $jbeg
3081                 && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
3082                 $tok0
3083             )
3084             || (   $j_match_end < $jend
3085                 && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
3086                 $tok0 )
3087           );
3088
3089         # Note that we are skipping the token at i=0
3090         for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
3091
3092             # do not delete a token which requires padding to align
3093             next if ( $rneed_pad->[$i] );
3094
3095             my $tok = $rtokens_match->[$i];
3096
3097             # Do not delete a token which occurs in a surrounding line
3098             next
3099               if ( $j_match_beg > $jbeg
3100                 && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
3101             next
3102               if ( $j_match_end < $jend
3103                 && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
3104
3105             # ok to delete
3106             push @idel, $i;
3107             ##print "ok to delete tok=$tok\n";
3108         }
3109         if (@idel) {
3110             foreach my $j ( $j_match_beg .. $j_match_end ) {
3111                 delete_selected_tokens( $rnew_lines->[$j], \@idel );
3112             }
3113         }
3114         return;
3115     };
3116
3117     foreach my $item ( @{$rsubgroups} ) {
3118         ( $jbeg, $jend ) = @{$item};
3119         my $nlines = $jend - $jbeg + 1;
3120         next unless ( $nlines > 2 );
3121
3122         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
3123             my $line = $rnew_lines->[$jj];
3124             $rtokens        = $line->get_rtokens();
3125             $rfield_lengths = $line->get_rfield_lengths();
3126             $imax           = @{$rtokens} - 2;
3127
3128             # start a new match group
3129             if ( $jj == $jbeg ) {
3130                 $start_match->($jj);
3131                 next;
3132             }
3133
3134             # see if all tokens of this line match the current group
3135             my $match;
3136             if ( $imax == $imax_match ) {
3137                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
3138                     my $tok       = $rtokens->[$i];
3139                     my $tok_match = $rtokens_match->[$i];
3140                     last if ( $tok ne $tok_match );
3141                 }
3142                 $match = 1;
3143             }
3144
3145             # yes, they all match
3146             if ($match) {
3147                 $add_to_match->($jj);
3148             }
3149
3150             # now, this line does not match
3151             else {
3152                 $end_match->();
3153                 $start_match->($jj);
3154             }
3155         }    # End loopover lines
3156         $end_match->();
3157     }    # End loop over subgroups
3158     return;
3159 } ## end sub delete_null_alignments
3160
3161 sub match_line_pairs {
3162     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3163
3164     # Compare each pair of lines and save information about common matches
3165     # $rlines     = list of lines including hanging side comments
3166     # $rnew_lines = list of lines without any hanging side comments
3167     # $rsubgroups = list of subgroups of the new lines
3168
3169     # TODO:
3170     # Maybe change: imax_pair => pair_match_info = ref to array
3171     #  = [$imax_align, $rMsg, ... ]
3172     #  This may eventually have multi-level match info
3173
3174     # Previous line vars
3175     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3176         $list_type_m, $ci_level_m );
3177
3178     # Current line vars
3179     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3180         $ci_level );
3181
3182     use constant EXPLAIN_COMPARE_PATTERNS => 0;
3183
3184     my $compare_patterns = sub {
3185
3186         # helper routine to decide if patterns match well enough..
3187         # return code:
3188         #   0 = patterns match, continue
3189         #   1 = no match
3190         #   2 = no match, and lines do not match at all
3191
3192         my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3193         my $GoToMsg     = "";
3194         my $return_code = 1;
3195
3196         my ( $alignment_token, $lev, $tag, $tok_count ) =
3197           decode_alignment_token($tok);
3198
3199         # We have to be very careful about aligning commas
3200         # when the pattern's don't match, because it can be
3201         # worse to create an alignment where none is needed
3202         # than to omit one.  Here's an example where the ','s
3203         # are not in named containers.  The first line below
3204         # should not match the next two:
3205         #   ( $a, $b ) = ( $b, $r );
3206         #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3207         #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3208         if ( $alignment_token eq ',' ) {
3209
3210             # do not align commas unless they are in named
3211             # containers
3212             $GoToMsg = "do not align commas in unnamed containers";
3213             goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
3214         }
3215
3216         # do not align parens unless patterns match;
3217         # large ugly spaces can occur in math expressions.
3218         elsif ( $alignment_token eq '(' ) {
3219
3220             # But we can allow a match if the parens don't
3221             # require any padding.
3222             $GoToMsg = "do not align '(' unless patterns match or pad=0";
3223             if ( $pad != 0 ) { goto NO_MATCH }
3224         }
3225
3226         # Handle an '=' alignment with different patterns to
3227         # the left.
3228         elsif ( $alignment_token eq '=' ) {
3229
3230             # It is best to be a little restrictive when
3231             # aligning '=' tokens.  Here is an example of
3232             # two lines that we will not align:
3233             #       my $variable=6;
3234             #       $bb=4;
3235             # The problem is that one is a 'my' declaration,
3236             # and the other isn't, so they're not very similar.
3237             # We will filter these out by comparing the first
3238             # letter of the pattern.  This is crude, but works
3239             # well enough.
3240             if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3241                 $GoToMsg = "first character before equals differ";
3242                 goto NO_MATCH;
3243             }
3244
3245             # The introduction of sub 'prune_alignment_tree'
3246             # enabled alignment of lists left of the equals with
3247             # other scalar variables. For example:
3248             # my ( $D, $s, $e ) = @_;
3249             # my $d             = length $D;
3250             # my $c             = $e - $s - $d;
3251
3252             # But this would change formatting of a lot of scripts,
3253             # so for now we prevent alignment of comma lists on the
3254             # left with scalars on the left.  We will also prevent
3255             # any partial alignments.
3256
3257           # set return code 2 if the = is at line level, but
3258           # set return code 1 if the = is below line level, i.e.
3259           #    sub new { my ( $p, $v ) = @_; bless \$v, $p }
3260           #    sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3261
3262             elsif (
3263                 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
3264             {
3265                 $GoToMsg = "mixed commas/no-commas before equals";
3266                 if ( $lev eq $group_level ) {
3267                     $return_code = 2;
3268                 }
3269                 goto NO_MATCH;
3270             }
3271         }
3272
3273       MATCH:
3274         return ( 0, \$GoToMsg );
3275
3276       NO_MATCH:
3277
3278         EXPLAIN_COMPARE_PATTERNS
3279           && print STDERR "no match because $GoToMsg\n";
3280
3281         return ( $return_code, \$GoToMsg );
3282
3283     };    ## end of $compare_patterns->()
3284
3285     # loop over subgroups
3286     foreach my $item ( @{$rsubgroups} ) {
3287         my ( $jbeg, $jend ) = @{$item};
3288         my $nlines = $jend - $jbeg + 1;
3289         next unless ( $nlines > 1 );
3290
3291         # loop over lines in a subgroup
3292         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
3293
3294             $line_m           = $line;
3295             $rtokens_m        = $rtokens;
3296             $rpatterns_m      = $rpatterns;
3297             $rfield_lengths_m = $rfield_lengths;
3298             $imax_m           = $imax;
3299             $list_type_m      = $list_type;
3300             $ci_level_m       = $ci_level;
3301
3302             $line           = $rnew_lines->[$jj];
3303             $rtokens        = $line->get_rtokens();
3304             $rpatterns      = $line->get_rpatterns();
3305             $rfield_lengths = $line->get_rfield_lengths();
3306             $imax           = @{$rtokens} - 2;
3307             $list_type      = $line->get_list_type();
3308             $ci_level       = $line->get_ci_level();
3309
3310             # nothing to do for first line
3311             next if ( $jj == $jbeg );
3312
3313             my $ci_jump = $ci_level - $ci_level_m;
3314
3315             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3316
3317             my $imax_align = -1;
3318
3319             # find number of leading common tokens
3320
3321             #################################
3322             # No match to hanging side comment
3323             #################################
3324             if ( $line->get_is_hanging_side_comment() ) {
3325
3326                 # Should not get here; HSC's have been filtered out
3327                 $imax_align = -1;
3328             }
3329
3330             ##############################
3331             # Handle comma-separated lists
3332             ##############################
3333             elsif ( $list_type && $list_type eq $list_type_m ) {
3334
3335                 # do not align lists across a ci jump with new list method
3336                 if ($ci_jump) { $imax_min = -1 }
3337
3338                 my $i_nomatch = $imax_min + 1;
3339                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
3340                     my $tok   = $rtokens->[$i];
3341                     my $tok_m = $rtokens_m->[$i];
3342                     if ( $tok ne $tok_m ) {
3343                         $i_nomatch = $i;
3344                         last;
3345                     }
3346                 }
3347
3348                 $imax_align = $i_nomatch - 1;
3349             }
3350
3351             ##################
3352             # Handle non-lists
3353             ##################
3354             else {
3355                 my $i_nomatch = $imax_min + 1;
3356                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
3357                     my $tok   = $rtokens->[$i];
3358                     my $tok_m = $rtokens_m->[$i];
3359                     if ( $tok ne $tok_m ) {
3360                         $i_nomatch = $i;
3361                         last;
3362                     }
3363
3364                     my $pat   = $rpatterns->[$i];
3365                     my $pat_m = $rpatterns_m->[$i];
3366
3367                     # If patterns don't match, we have to be careful...
3368                     if ( $pat_m ne $pat ) {
3369                         my $pad =
3370                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3371                         my ( $match_code, $rmsg ) = $compare_patterns->(
3372                             $tok, $tok_m, $pat, $pat_m, $pad
3373                         );
3374                         if ($match_code) {
3375                             if    ( $match_code eq 1 ) { $i_nomatch = $i }
3376                             elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
3377                             last;
3378                         }
3379                     }
3380                 }
3381                 $imax_align = $i_nomatch - 1;
3382             }
3383
3384             $line_m->set_imax_pair($imax_align);
3385
3386         } ## end loop over lines
3387
3388         # Put fence at end of subgroup
3389         $line->set_imax_pair(-1);
3390
3391     } ## end loop over subgroups
3392
3393     # if there are hanging side comments, propagate the pair info down to them
3394     # so that lines can just look back one line for their pair info.
3395     if ( @{$rlines} > @{$rnew_lines} ) {
3396         my $last_pair_info = -1;
3397         foreach my $line ( @{$rlines} ) {
3398             if ( $line->get_is_hanging_side_comment() ) {
3399                 $line->set_imax_pair($last_pair_info);
3400             }
3401             else {
3402                 $last_pair_info = $line->get_imax_pair();
3403             }
3404         }
3405     }
3406     return;
3407 }
3408
3409 sub fat_comma_to_comma {
3410     my ($str) = @_;
3411
3412     # We are changing '=>' to ',' and removing any trailing decimal count
3413     # because currently fat commas have a count and commas do not.
3414     # For example, we will change '=>2+{-3.2' into ',2+{-3'
3415     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3416     return $str;
3417 }
3418
3419 sub get_line_token_info {
3420
3421     # scan lines of tokens and return summary information about the range of
3422     # levels and patterns.
3423     my ($rlines) = @_;
3424
3425     # First scan to check monotonicity. Here is an example of several
3426     # lines which are monotonic. The = is the lowest level, and
3427     # the commas are all one level deeper. So this is not nonmonotonic.
3428     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
3429     #  $$d{"days"}    = [ "d",  "day", "days" ];
3430     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
3431     my @all_token_info;
3432     my $all_monotonic = 1;
3433     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
3434         my ($line) = $rlines->[$jj];
3435         my $rtokens = $line->get_rtokens();
3436         my $last_lev;
3437         my $is_monotonic = 1;
3438         my $i            = -1;
3439         foreach my $tok ( @{$rtokens} ) {
3440             $i++;
3441             my ( $raw_tok, $lev, $tag, $tok_count ) =
3442               decode_alignment_token($tok);
3443             push @{ $all_token_info[$jj] },
3444               [ $raw_tok, $lev, $tag, $tok_count ];
3445             last if ( $tok eq '#' );
3446             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3447             $last_lev = $lev;
3448         }
3449         if ( !$is_monotonic ) { $all_monotonic = 0 }
3450     }
3451
3452     my $rline_values = [];
3453     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
3454         my ($line) = $rlines->[$jj];
3455
3456         my $rtokens = $line->get_rtokens();
3457         my $i       = -1;
3458         my ( $lev_min, $lev_max );
3459         my $token_pattern_max = "";
3460         my %saw_level;
3461         my @token_info;
3462         my $is_monotonic = 1;
3463
3464         # find the index of the last token before the side comment
3465         my $imax      = @{$rtokens} - 2;
3466         my $imax_true = $imax;
3467
3468         # If the entire group is monotonic, and the line ends in a comma list,
3469         # walk it back to the first such comma. this will have the effect of
3470         # making all trailing ragged comma lists match in the prune tree
3471         # routine.  these trailing comma lists can better be handled by later
3472         # alignment rules.
3473
3474         # Treat fat commas the same as commas here by converting them to
3475         # commas.  This will improve the chance of aligning the leading parts
3476         # of ragged lists.
3477
3478         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3479         if ( $all_monotonic && $tok_end =~ /^,/ ) {
3480             my $i = $imax - 1;
3481             while ( $i >= 0
3482                 && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
3483             {
3484                 $imax = $i;
3485                 $i--;
3486             }
3487         }
3488
3489         # make a first pass to find level range
3490         my $last_lev;
3491         foreach my $tok ( @{$rtokens} ) {
3492             $i++;
3493             last if ( $i > $imax );
3494             last if ( $tok eq '#' );
3495             my ( $raw_tok, $lev, $tag, $tok_count ) =
3496               @{ $all_token_info[$jj]->[$i] };
3497
3498             last if ( $tok eq '#' );
3499             $token_pattern_max .= $tok;
3500             $saw_level{$lev}++;
3501             if ( !defined($lev_min) ) {
3502                 $lev_min = $lev;
3503                 $lev_max = $lev;
3504             }
3505             else {
3506                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
3507                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
3508                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3509             }
3510             $last_lev = $lev;
3511         }
3512
3513         # handle no levels
3514         my $rtoken_patterns = {};
3515         my $rtoken_indexes  = {};
3516         my @levs            = sort keys %saw_level;
3517         if ( !defined($lev_min) ) {
3518             $lev_min                     = -1;
3519             $lev_max                     = -1;
3520             $levs[0]                     = -1;
3521             $rtoken_patterns->{$lev_min} = "";
3522             $rtoken_indexes->{$lev_min}  = [];
3523         }
3524
3525         # handle one level
3526         elsif ( $lev_max == $lev_min ) {
3527             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3528             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3529         }
3530
3531         # handle multiple levels
3532         else {
3533             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3534             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3535
3536             my $debug   = 0;
3537             my $lev_top = pop @levs;    # alread did max level
3538             my $itok    = -1;
3539             foreach my $tok ( @{$rtokens} ) {
3540                 $itok++;
3541                 last if ( $itok > $imax );
3542                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3543                   @{ $all_token_info[$jj]->[$itok] };
3544                 last if ( $raw_tok eq '#' );
3545                 foreach my $lev_test (@levs) {
3546                     next if ( $lev > $lev_test );
3547                     $rtoken_patterns->{$lev_test} .= $tok;
3548                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
3549                 }
3550             }
3551             push @levs, $lev_top;
3552         }
3553
3554         push @{$rline_values},
3555           [
3556             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3557             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3558           ];
3559
3560         # debug
3561         0 && do {
3562             local $" = ')(';
3563             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3564             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3565                 print "$key => $rtoken_patterns->{$key}\n";
3566                 print "$key => @{$rtoken_indexes->{$key}}\n";
3567             }
3568         };
3569     } ## end loop over lines
3570     return ( $rline_values, $all_monotonic );
3571 }
3572
3573 sub prune_alignment_tree {
3574     my ($rlines) = @_;
3575     my $jmax = @{$rlines} - 1;
3576     return unless $jmax > 0;
3577
3578     # Vertical alignment in perltidy is done as an iterative process.  The
3579     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3580     # etc) for vertical alignment.  Then we have to delete all alignments
3581     # which, if actually made, would detract from overall alignment.  This
3582     # is done in several phases of which this is one.
3583
3584     # In this routine we look at the alignments of a group of lines as a
3585     # hierarchical tree.  We will 'prune' the tree to limited depths if that
3586     # will improve overall alignment at the lower depths.
3587     # For each line we will be looking at its alignment patterns down to
3588     # different fixed depths. For each depth, we include all lower depths and
3589     # ignore all higher depths.  We want to see if we can get alignment of a
3590     # larger group of lines if we ignore alignments at some lower depth.
3591     # Here is an # example:
3592
3593     # for (
3594     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
3595     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
3596     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
3597     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
3598     # );
3599
3600     # In the above example, all lines have three commas at the lowest depth
3601     # (zero), so if there were no other alignements, these lines would all
3602     # align considering only the zero depth alignment token.  But some lines
3603     # have additional comma alignments at the next depth, so we need to decide
3604     # if we should drop those to keep the top level alignments, or keep those
3605     # for some additional low level alignments at the expense losing some top
3606     # level alignments.  In this case we will drop the deeper level commas to
3607     # keep the entire collection aligned.  But in some cases the decision could
3608     # go the other way.
3609
3610     # The tree for this example at the zero depth has one node containing
3611     # all four lines, since they are identical at zero level (three commas).
3612     # At depth one, there are three 'children' nodes, namely:
3613     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3614     # - line 3, which has 2 commas at depth 1
3615     # - line4, which has a ';' and a ',' at depth 1
3616     # There are no deeper alignments in this example.
3617     # so the tree structure for this example is:
3618     #
3619     #    depth 0         depth 1      depth 2
3620     #    [lines 1-4] --  [line 1-2] -  (empty)
3621     #                 |  [line 3]   -  (empty)
3622     #                 |  [line 4]   -  (empty)
3623
3624     # We can carry this to any depth, but it is not really useful to go below
3625     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3626     # alignments at depth >=2.
3627
3628     use constant EXPLAIN_PRUNE => 0;
3629
3630     ####################################################################
3631     # Prune Tree Step 1. Start by scanning the lines and collecting info
3632     ####################################################################
3633
3634     # Note that the caller had this info but we have to redo this now because
3635     # alignment tokens may have been deleted.
3636     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3637
3638     # If all the lines have levels which increase monotonically from left to
3639     # right, then the sweep-left-to-right pass can do a better job of alignment
3640     # than pruning, and without deleting alignments.
3641     return if ($all_monotonic);
3642
3643     # Contents of $rline_values
3644     #   [
3645     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3646     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3647     #   ];
3648
3649     # We can work to any depth, but there is little advantage to working
3650     # to a a depth greater than 2
3651     my $MAX_DEPTH = 2;
3652
3653     # This arrays will hold the tree of alignment tokens at different depths
3654     # for these lines.
3655     my @match_tree;
3656
3657     # Tree nodes contain these values:
3658     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3659     #                        $nc_beg_p, $nc_end_p, $rindexes];
3660     # where
3661     #      $depth = 0,1,2 = index of depth of the match
3662
3663     #  $jbeg beginning index j of the range of lines in this match
3664     #  $jend ending index j of the range of lines in this match
3665     #  $n_parent = index of the containing group at $depth-1, if it exists
3666     #  $level = actual level of code being matched in this group
3667     #  $pattern = alignment pattern being matched
3668     #  $nc_beg_p = first child
3669     #  $nc_end_p = last child
3670     #  $rindexes = ref to token indexes
3671
3672     # the patterns and levels of the current group being formed at each depth
3673     my ( @token_patterns_current, @levels_current, @token_indexes_current );
3674
3675     # the patterns and levels of the next line being tested at each depth
3676     my ( @token_patterns_next, @levels_next, @token_indexes_next );
3677
3678     #########################################################
3679     # define a recursive worker subroutine for tree construction
3680     #########################################################
3681
3682     # This is a recursive routine which is called if a match condition changes
3683     # at any depth when a new line is encountered.  It ends the match node
3684     # which changed plus all deeper nodes attached to it.
3685     my $end_node;
3686     $end_node = sub {
3687         my ( $depth, $jl, $n_parent ) = @_;
3688
3689         # $depth is the tree depth
3690         # $jl is the  index of the line
3691         # $n_parent is index of the parent node of this node
3692
3693         return if ( $depth > $MAX_DEPTH );
3694
3695         # end any current group at this depth
3696         if (   $jl >= 0
3697             && defined( $match_tree[$depth] )
3698             && @{ $match_tree[$depth] }
3699             && defined( $levels_current[$depth] ) )
3700         {
3701             $match_tree[$depth]->[-1]->[1] = $jl;
3702         }
3703
3704         # Define the index of the node we will create below
3705         my $ng_self = 0;
3706         if ( defined( $match_tree[$depth] ) ) {
3707             $ng_self = @{ $match_tree[$depth] };
3708         }
3709
3710         # end any next deeper child node(s)
3711         $end_node->( $depth + 1, $jl, $ng_self );
3712
3713         # update the levels being matched
3714         $token_patterns_current[$depth] = $token_patterns_next[$depth];
3715         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
3716         $levels_current[$depth]         = $levels_next[$depth];
3717
3718         # Do not start a new group at this level if it is not being used
3719         if ( !defined( $levels_next[$depth] )
3720             || $depth > 0
3721             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3722         {
3723             return;
3724         }
3725
3726         # Create a node for the next group at this depth. We initially assume
3727         # that it will continue to $jmax, and correct that later if the node
3728         # ends earlier.
3729         push @{ $match_tree[$depth] },
3730           [
3731             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3732             $token_patterns_current[$depth],
3733             undef, undef, $token_indexes_current[$depth],
3734           ];
3735
3736         return;
3737     };    ## end sub end_node
3738
3739     ######################################################
3740     # Prune Tree Step 2. Loop to form the tree of matches.
3741     ######################################################
3742     for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
3743
3744         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3745         my $jm = $jp - 1;
3746
3747         # Pull out needed values for the next line
3748         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3749             $is_monotonic, $imax_true, $imax )
3750           = @{ $rline_values->[$jp] };
3751
3752         # Transfer levels and patterns for this line to the working arrays.
3753         # If the number of levels differs from our chosen MAX_DEPTH ...
3754         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3755         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3756         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3757         if ( @{$rlevs} > $MAX_DEPTH ) {
3758             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3759         }
3760         my $depth = 0;
3761         foreach (@levels_next) {
3762             $token_patterns_next[$depth] =
3763               defined($_) ? $rtoken_patterns->{$_} : undef;
3764             $token_indexes_next[$depth] =
3765               defined($_) ? $rtoken_indexes->{$_} : undef;
3766             $depth++;
3767         }
3768
3769         # Look for a change in match groups...
3770
3771         # Initialize on the first line
3772         if ( $jp == 0 ) {
3773             my $n_parent;
3774             $end_node->( 0, $jm, $n_parent );
3775         }
3776
3777         # End groups if a hard flag has been set
3778         elsif ( $rlines->[$jm]->get_end_group() ) {
3779             my $n_parent;
3780             $end_node->( 0, $jm, $n_parent );
3781         }
3782
3783         # Continue at hanging side comment
3784         elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
3785             next;
3786         }
3787
3788         # Otherwise see if anything changed and update the tree if so
3789         else {
3790             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3791
3792                 my $def_current = defined( $token_patterns_current[$depth] );
3793                 my $def_next    = defined( $token_patterns_next[$depth] );
3794                 last unless ( $def_current || $def_next );
3795                 if (   !$def_current
3796                     || !$def_next
3797                     || $token_patterns_current[$depth] ne
3798                     $token_patterns_next[$depth] )
3799                 {
3800                     my $n_parent;
3801                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3802                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3803                     }
3804                     $end_node->( $depth, $jm, $n_parent );
3805                     last;
3806                 }
3807             }
3808         }
3809     } ## end loop to form tree of matches
3810
3811     ##########################################################
3812     # Prune Tree Step 3. Make links from parent to child nodes
3813     ##########################################################
3814
3815     # It seemed cleaner to do this as a separate step rather than during tree
3816     # construction.  The children nodes have links up to the parent node which
3817     # created them.  Now make links in the opposite direction, so the parents
3818     # can find the children.  We store the range of children nodes ($nc_beg,
3819     # $nc_end) of each parent with two additional indexes in the orignal array.
3820     # These will be undef if no children.
3821     for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
3822         next unless defined( $match_tree[$depth] );
3823         my $nc_max = @{ $match_tree[$depth] } - 1;
3824         my $np_now;
3825         foreach my $nc ( 0 .. $nc_max ) {
3826             my $np = $match_tree[$depth]->[$nc]->[2];
3827             if ( !defined($np) ) {
3828
3829                 # shouldn't happen
3830                 #print STDERR "lost child $np at depth $depth\n";
3831                 next;
3832             }
3833             if ( !defined($np_now) || $np != $np_now ) {
3834                 $np_now = $np;
3835                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3836             }
3837             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3838         }
3839     } ## end loop to make links down to the child nodes
3840
3841     EXPLAIN_PRUNE > 0 && do {
3842         print "Tree complete. Found these groups:\n";
3843         foreach my $depth ( 0 .. $MAX_DEPTH ) {
3844             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3845         }
3846     };
3847
3848     #######################################################
3849     # Prune Tree Step 4. Make a list of nodes to be deleted
3850     #######################################################
3851
3852     #  list of lines with tokens to be deleted:
3853     #  [$jbeg, $jend, $level_keep]
3854     #  $jbeg..$jend is the range of line indexes,
3855     #  $level_keep is the minimum level to keep
3856     my @delete_list;
3857
3858     #  Groups with ending comma lists and their range of sizes:
3859     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3860     my %ragged_comma_group;
3861
3862     # Define a threshold line count for forcing a break
3863     my $nlines_break = 3;
3864
3865     # We work with a list of nodes to visit at the next deeper depth.
3866     my @todo_list;
3867     if ( defined( $match_tree[0] ) ) {
3868         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3869     }
3870
3871     for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
3872         last unless (@todo_list);
3873         my @todo_next;
3874         foreach my $np (@todo_list) {
3875             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3876                 $rindexes_p )
3877               = @{ $match_tree[$depth]->[$np] };
3878             my $nlines_p = $jend_p - $jbeg_p + 1;
3879
3880             # nothing to do if no children
3881             next unless defined($nc_beg_p);
3882
3883             # Define the number of lines to either keep or delete a child node.
3884             # This is the key decision we have to make.  We want to delete
3885             # short runs of matched lines, and keep long runs.  It seems easier
3886             # for the eye to follow breaks in monotonic level changes than
3887             # non-monotonic level changes.  For example, the following looks
3888             # best if we delete the lower level alignments:
3889
3890             #  [1]                  ~~ [];
3891             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3892             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
3893             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
3894             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
3895             #  $deep1               ~~ $deep1;
3896
3897             # So we will use two thresholds.
3898             my $nmin_mono     = $depth + 2;
3899             my $nmin_non_mono = $depth + 6;
3900             if ( $nmin_mono > $nlines_p - 1 ) {
3901                 $nmin_mono = $nlines_p - 1;
3902             }
3903             if ( $nmin_non_mono > $nlines_p - 1 ) {
3904                 $nmin_non_mono = $nlines_p - 1;
3905             }
3906
3907             # loop to keep or delete each child node
3908             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3909                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3910                     $nc_end_c )
3911                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
3912                 my $nlines_c     = $jend_c - $jbeg_c + 1;
3913                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3914                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3915                 if ( $nlines_c < $nmin ) {
3916 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3917                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3918                 }
3919                 else {
3920 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3921                     push @todo_next, $nc;
3922                 }
3923             }
3924         }
3925         @todo_list = @todo_next;
3926     } ## end loop to mark nodes to delete
3927
3928     #############################################################
3929     # Prune Tree Step 5. Loop to delete selected alignment tokens
3930     #############################################################
3931     foreach my $item (@delete_list) {
3932         my ( $jbeg, $jend, $level_keep ) = @{$item};
3933         foreach my $jj ( $jbeg .. $jend ) {
3934             my $line = $rlines->[$jj];
3935             my @idel;
3936             my $rtokens = $line->get_rtokens();
3937             my $imax    = @{$rtokens} - 2;
3938             for ( my $i = 0 ; $i <= $imax ; $i++ ) {
3939                 my $tok = $rtokens->[$i];
3940                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3941                   decode_alignment_token($tok);
3942                 if ( $lev > $level_keep ) {
3943                     push @idel, $i;
3944                 }
3945             }
3946             if (@idel) {
3947                 delete_selected_tokens( $line, \@idel );
3948             }
3949         }
3950     } ## end loop to delete selected alignment tokens
3951
3952     return;
3953 } ## end sub prune_alignment_tree
3954
3955 sub Dump_tree_groups {
3956     my ( $rgroup, $msg ) = @_;
3957     print "$msg\n";
3958     local $" = ')(';
3959     foreach my $item ( @{$rgroup} ) {
3960         my @fix = @{$item};
3961         foreach (@fix) { $_ = "undef" unless defined $_; }
3962         $fix[4] = "...";
3963         print "(@fix)\n";
3964     }
3965     return;
3966 }
3967
3968 {    ## closure for sub is_marginal_match
3969
3970     my %is_if_or;
3971     my %is_assignment;
3972     my %is_good_alignment;
3973
3974     # This test did not give sufficiently better results to use as an update,
3975     # but the flag is worth keeping as a starting point for future testing.
3976     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
3977
3978     BEGIN {
3979
3980         my @q = qw(
3981           if unless or ||
3982         );
3983         @is_if_or{@q} = (1) x scalar(@q);
3984
3985         @q = qw(
3986           = **= += *= &= <<= &&=
3987           -= /= |= >>= ||= //=
3988           .= %= ^=
3989           x=
3990         );
3991         @is_assignment{@q} = (1) x scalar(@q);
3992
3993         # Vertically aligning on certain "good" tokens is usually okay
3994         # so we can be less restrictive in marginal cases.
3995         @q = qw( { ? => = );
3996         push @q, (',');
3997         @is_good_alignment{@q} = (1) x scalar(@q);
3998     }
3999
4000     sub is_marginal_match {
4001
4002         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
4003
4004         # Decide if we should undo some or all of the common alignments of a
4005         # group of just two lines.
4006
4007         # Given:
4008         #   $line_0 and $line_1 - the two lines
4009         #   $group_level = the indentation level of the group being processed
4010         #   $imax_align = the maximum index of the common alignment tokens
4011         #                 of the two lines
4012         #   $imax_prev  = the maximum index of the common alignment tokens
4013         #                 with the line before $line_0 (=-1 of does not exist)
4014
4015         # Return:
4016         #   $is_marginal = true if the two lines should NOT be fully aligned
4017         #                = false if the two lines can remain fully aligned
4018         #   $imax_align  = the index of the highest alignment token shared by
4019         #                  these two lines to keep if the match is marginal.
4020
4021         # When we have an alignment group of just two lines like this, we are
4022         # working in the twilight zone of what looks good and what looks bad.
4023         # This routine is a collection of rules which work have been found to
4024         # work fairly well, but it will need to be updated from time to time.
4025
4026         my $is_marginal = 0;
4027
4028         # always keep alignments of a terminal else or ternary
4029         goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
4030
4031         # always align lists
4032         my $group_list_type = $line_0->get_list_type();
4033         goto RETURN if ($group_list_type);
4034
4035         # always align hanging side comments
4036         my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
4037         goto RETURN if ($is_hanging_side_comment);
4038
4039         my $jmax_0           = $line_0->get_jmax();
4040         my $jmax_1           = $line_1->get_jmax();
4041         my $rtokens_1        = $line_1->get_rtokens();
4042         my $rtokens_0        = $line_0->get_rtokens();
4043         my $rfield_lengths_0 = $line_0->get_rfield_lengths();
4044         my $rfield_lengths_1 = $line_1->get_rfield_lengths();
4045         my $rpatterns_0      = $line_0->get_rpatterns();
4046         my $rpatterns_1      = $line_1->get_rpatterns();
4047         my $imax_next        = $line_1->get_imax_pair();
4048
4049         # We will scan the alignment tokens and set a flag '$is_marginal' if
4050         # it seems that the an alignment would look bad.
4051         my $max_pad            = 0;
4052         my $saw_good_alignment = 0;
4053         my $saw_if_or;        # if we saw an 'if' or 'or' at group level
4054         my $raw_tokb = "";    # first token seen at group level
4055         my $jfirst_bad;
4056         my $line_ending_fat_comma;    # is last token just a '=>' ?
4057         my $j0_eq_pad;
4058         my $j0_max_pad = 0;
4059
4060         for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
4061             my ( $raw_tok, $lev, $tag, $tok_count ) =
4062               decode_alignment_token( $rtokens_1->[$j] );
4063             if ( $raw_tok && $lev == $group_level ) {
4064                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
4065                 $saw_if_or ||= $is_if_or{$raw_tok};
4066             }
4067
4068             # When the first of the two lines ends in a bare '=>' this will
4069             # probably be marginal match.  (For a bare =>, the next field length
4070             # will be 2 or 3, depending on side comment)
4071             $line_ending_fat_comma =
4072                  $j == $jmax_1 - 2
4073               && $raw_tok eq '=>'
4074               && $rfield_lengths_0->[ $j + 1 ] <= 3;
4075
4076             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
4077             if ( $j == 0 ) {
4078                 $pad += $line_1->get_leading_space_count() -
4079                   $line_0->get_leading_space_count();
4080
4081                 # Remember the pad at a leading equals
4082                 if ( $raw_tok eq '=' && $lev == $group_level ) {
4083                     $j0_eq_pad = $pad;
4084                     $j0_max_pad =
4085                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
4086                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
4087                 }
4088             }
4089
4090             if ( $pad < 0 )        { $pad     = -$pad }
4091             if ( $pad > $max_pad ) { $max_pad = $pad }
4092             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
4093                 $saw_good_alignment = 1;
4094             }
4095             else {
4096                 $jfirst_bad = $j unless defined($jfirst_bad);
4097             }
4098             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
4099
4100                 # Flag this as a marginal match since patterns differ.
4101                 # Normally, we will not allow just two lines to match if
4102                 # marginal. But we can allow matching in some specific cases.
4103
4104                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
4105                 $is_marginal = 1  if ( $is_marginal == 0 );
4106                 if ( $raw_tok eq '=' ) {
4107
4108                     # Here is an example of a marginal match:
4109                     #       $done{$$op} = 1;
4110                     #       $op         = compile_bblock($op);
4111                     # The left tokens are both identifiers, but
4112                     # one accesses a hash and the other doesn't.
4113                     # We'll let this be a tentative match and undo
4114                     # it later if we don't find more than 2 lines
4115                     # in the group.
4116                     $is_marginal = 2;
4117                 }
4118             }
4119         }
4120
4121         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
4122
4123         # Turn off the "marginal match" flag in some cases...
4124         # A "marginal match" occurs when the alignment tokens agree
4125         # but there are differences in the other tokens (patterns).
4126         # If we leave the marginal match flag set, then the rule is that we
4127         # will align only if there are more than two lines in the group.
4128         # We will turn of the flag if we almost have a match
4129         # and either we have seen a good alignment token or we
4130         # just need a small pad (2 spaces) to fit.  These rules are
4131         # the result of experimentation.  Tokens which misaligned by just
4132         # one or two characters are annoying.  On the other hand,
4133         # large gaps to less important alignment tokens are also annoying.
4134         if ( $is_marginal == 1
4135             && ( $saw_good_alignment || $max_pad < 3 ) )
4136         {
4137             $is_marginal = 0;
4138         }
4139
4140         # We will use the line endings to help decide on alignments...
4141         # See if the lines end with semicolons...
4142         my $sc_term0;
4143         my $sc_term1;
4144         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
4145
4146             # shouldn't happen
4147         }
4148         else {
4149             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
4150             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
4151             $sc_term0 = $pat0 =~ /;b?$/;
4152             $sc_term1 = $pat1 =~ /;b?$/;
4153         }
4154
4155         if ( !$is_marginal && !$sc_term0 ) {
4156
4157             # First line of assignment should be semicolon terminated.
4158             # For example, do not align here:
4159             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4160             #    $$href{-NUM_DIRS} = 0;
4161             if ( $is_assignment{$raw_tokb} ) {
4162                 $is_marginal = 1;
4163             }
4164         }
4165
4166         # Try to avoid some undesirable alignments of opening tokens
4167         # for example, the space between grep and { here:
4168         #  return map { ( $_ => $_ ) }
4169         #    grep     { /$handles/ } $self->_get_delegate_method_list;
4170         $is_marginal ||=
4171              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4172           && $jmax_1 == 2
4173           && $sc_term0 ne $sc_term1;
4174
4175         ########################################
4176         # return unless this is a marginal match
4177         ########################################
4178         goto RETURN if ( !$is_marginal );
4179
4180         # Undo the marginal match flag in certain cases,
4181
4182         # Two lines with a leading equals-like operator are allowed to
4183         # align if the patterns to the left of the equals are the same.
4184         # For example the following two lines are a marginal match but have
4185         # the same left side patterns, so we will align the equals.
4186         #     my $orig = my $format = "^<<<<< ~~\n";
4187         #     my $abc  = "abc";
4188         # But these have a different left pattern so they will not be
4189         # aligned
4190         #     $xmldoc .= $`;
4191         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
4192
4193         # First line semicolon terminated but second not, usually ok:
4194         #               my $want = "'ab', 'a', 'b'";
4195         #               my $got  = join( ", ",
4196         #                    map { defined($_) ? "'$_'" : "undef" }
4197         #                          @got );
4198         #  First line not semicolon terminated, Not OK to match:
4199         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4200         #      $$href{-NUM_DIRS} = 0;
4201         my $pat0 = $rpatterns_0->[0];
4202         my $pat1 = $rpatterns_1->[0];
4203
4204         ##########################################################
4205         # Turn off the marginal flag for some types of assignments
4206         ##########################################################
4207         if ( $is_assignment{$raw_tokb} ) {
4208
4209             # undo marginal flag if first line is semicolon terminated
4210             # and leading patters match
4211             if ($sc_term0) {    # && $sc_term1) {
4212                 $is_marginal = $pat0 ne $pat1;
4213             }
4214         }
4215         elsif ( $raw_tokb eq '=>' ) {
4216
4217             # undo marginal flag if patterns match
4218             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4219         }
4220         elsif ( $raw_tokb eq '=~' ) {
4221
4222             # undo marginal flag if both lines are semicolon terminated
4223             # and leading patters match
4224             if ( $sc_term1 && $sc_term0 ) {
4225                 $is_marginal = $pat0 ne $pat1;
4226             }
4227         }
4228
4229         ######################################################
4230         # Turn off the marginal flag if we saw an 'if' or 'or'
4231         ######################################################
4232
4233         # A trailing 'if' and 'or' often gives a good alignment
4234         # For example, we can align these:
4235         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
4236         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4237
4238         # or
4239         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
4240         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4241
4242         if ($saw_if_or) {
4243
4244             # undo marginal flag if both lines are semicolon terminated
4245             if ( $sc_term0 && $sc_term1 ) {
4246                 $is_marginal = 0;
4247             }
4248         }
4249
4250         # For a marginal match, only keep matches before the first 'bad' match
4251         if (   $is_marginal
4252             && defined($jfirst_bad)
4253             && $imax_align > $jfirst_bad - 1 )
4254         {
4255             $imax_align = $jfirst_bad - 1;
4256         }
4257
4258         ###########################################################
4259         # Allow sweep to match lines with leading '=' in some cases
4260         ###########################################################
4261         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4262
4263             if (
4264
4265                 # If there is a following line with leading equals, or
4266                 # preceding line with leading equals, then let the sweep align
4267                 # them without restriction.  For example, the first two lines
4268                 # here are a marginal match, but they are followed by a line
4269                 # with leading equals, so the sweep-lr logic can align all of
4270                 # the lines:
4271
4272                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4273                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4274                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4275                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4276
4277                 # Likewise, if we reverse the two pairs we want the same result
4278
4279                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4280                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4281                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4282                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4283
4284                 (
4285                        $imax_next >= 0
4286                     || $imax_prev >= 0
4287                     || TEST_MARGINAL_EQ_ALIGNMENT
4288                 )
4289                 && $j0_eq_pad >= -$j0_max_pad
4290                 && $j0_eq_pad <= $j0_max_pad
4291               )
4292             {
4293
4294                 # But do not do this if there is a comma before the '='.
4295                 # For example, the first two lines below have commas and
4296                 # therefore are not allowed to align with lines 3 & 4:
4297
4298                 # my ( $x, $y ) = $self->Size();                      #<--line_0
4299                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4300                 # my $vx = $right - $left;
4301                 # my $vy = $bottom - $top;
4302
4303                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4304                     $imax_align = 0;
4305                 }
4306             }
4307         }
4308
4309       RETURN:
4310         return ( $is_marginal, $imax_align );
4311     }
4312 }
4313
4314 sub get_extra_leading_spaces {
4315
4316     my ( $rlines, $rgroups ) = @_;
4317
4318     #----------------------------------------------------------
4319     # Define any extra indentation space (for the -lp option).
4320     # Here is why:
4321     # If a list has side comments, sub scan_list must dump the
4322     # list before it sees everything.  When this happens, it sets
4323     # the indentation to the standard scheme, but notes how
4324     # many spaces it would have liked to use.  We may be able
4325     # to recover that space here in the event that all of the
4326     # lines of a list are back together again.
4327     #----------------------------------------------------------
4328
4329     return 0 unless ( @{$rlines} && @{$rgroups} );
4330
4331     my $object = $rlines->[0]->get_indentation();
4332     return 0 unless ( ref($object) );
4333     my $extra_leading_spaces            = 0;
4334     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4335     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4336
4337     my $min_spaces = $extra_indentation_spaces_wanted;
4338     if ( $min_spaces > 0 ) { $min_spaces = 0 }
4339
4340     # loop over all groups
4341     my $ng      = -1;
4342     my $ngroups = @{$rgroups};
4343     foreach my $item ( @{$rgroups} ) {
4344         $ng++;
4345         my ( $jbeg, $jend ) = @{$item};
4346         foreach my $j ( $jbeg .. $jend ) {
4347             next if ( $j == 0 );
4348
4349             # all indentation objects must be the same
4350             if ( $object != $rlines->[$j]->get_indentation() ) {
4351                 return 0;
4352             }
4353         }
4354
4355        # find the maximum space without exceeding the line length for this group
4356         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4357         my $spaces =
4358           ( $avail > $extra_indentation_spaces_wanted )
4359           ? $extra_indentation_spaces_wanted
4360           : $avail;
4361
4362         #########################################################
4363         # Note: min spaces can be negative; for example with -gnu
4364         # f(
4365         #   do { 1; !!(my $x = bless []); }
4366         #  );
4367         #########################################################
4368         # The following rule is needed to match older formatting:
4369         # For multiple groups, we will keep spaces non-negative.
4370         # For a single group, we will allow a negative space.
4371         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4372
4373         # update the minimum spacing
4374         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4375             $extra_leading_spaces = $spaces;
4376         }
4377     }
4378
4379     # update the indentation object because with -icp the terminal
4380     # ');' will use the same adjustment.
4381     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4382     return $extra_leading_spaces;
4383 }
4384
4385 sub forget_side_comment {
4386     my ($self) = @_;
4387     $self->[_last_side_comment_column_] = 0;
4388     return;
4389 }
4390
4391 sub is_good_side_comment_column {
4392     my ( $self, $line, $line_number, $level, $num5 ) = @_;
4393
4394     # Upon encountering the first side comment of a group, decide if
4395     # a previous side comment should be forgotten.  This involves
4396     # checking several rules.
4397
4398     # Return true to keep old comment location
4399     # Return false to forget old comment location
4400
4401     my $rfields                 = $line->get_rfields();
4402     my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4403
4404     # RULE1: Never forget comment before a hanging side comment
4405     goto KEEP if ($is_hanging_side_comment);
4406
4407     # RULE2: Forget a side comment after a short line difference,
4408     # where 'short line difference' is computed from a formula.
4409     # Using a smooth formula helps minimize sudden large changes.
4410     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4411     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4412
4413     # '$num5' is the number of comments in the first 5 lines after the first
4414     # comment.  It is needed to keep a compact group of side comments from
4415     # being influenced by a more distant side comment.
4416     $num5 = 1 unless ($num5);
4417
4418     # Some values:
4419
4420     #        $adiff  $num5   $short_diff
4421     #        0       *       12
4422     #        1       1       6
4423     #        1       2       4
4424     #        1       3       3
4425     #        1       4       2
4426     #        2       1       4
4427     #        2       2       2
4428     #        2       3       1
4429     #        3       1       3
4430     #        3       2       1
4431
4432     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4433
4434     goto FORGET
4435       if ( $line_diff > $short_diff
4436         || !$self->[_rOpts_valign_side_comments_] );
4437
4438     # RULE3: Forget a side comment if this line is at lower level and
4439     # ends a block
4440     my $last_sc_level = $self->[_last_side_comment_level_];
4441     goto FORGET
4442       if ( $level < $last_sc_level
4443         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4444
4445     # RULE 4: Forget the last side comment if this comment might join a cached
4446     # line ...
4447     if ( my $cached_line_type = get_cached_line_type() ) {
4448
4449         # ... otherwise side comment alignment will get messed up.
4450         # For example, in the following test script
4451         # with using 'perltidy -sct -act=2', the last comment would try to
4452         # align with the previous and then be in the wrong column when
4453         # the lines are combined:
4454
4455         # foreach $line (
4456         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
4457         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
4458         #    [0, 4, 8], [2, 4, 6]
4459         #  )                                     # diagonals
4460         goto FORGET
4461           if ( $cached_line_type == 2 || $cached_line_type == 4 );
4462     }
4463
4464     # Otherwise, keep it alive
4465     goto KEEP;
4466
4467   FORGET:
4468     return 0;
4469
4470   KEEP:
4471     return 1;
4472 }
4473
4474 sub align_side_comments {
4475
4476     my ( $self, $rlines, $rgroups ) = @_;
4477
4478     # Align any side comments in this batch of lines
4479
4480     # Given:
4481     #  $rlines  - the lines
4482     #  $rgroups - the partition of the lines into groups
4483     #
4484     # We will be working group-by-group because all side comments
4485     # (real or fake) in each group are already aligned. So we just have
4486     # to make alignments between groups wherever possible.
4487
4488     # An unusual aspect is that within each group we have aligned both real
4489     # and fake side comments.  This has the consequence that the lengths of
4490     # long lines without real side comments can cause 'push' all side comments
4491     # to the right.  This seems unusual, but testing with and without this
4492     # feature shows that it is usually better this way.  Othewise, side
4493     # comments can be hidden between long lines without side comments and
4494     # thus be harder to read.
4495
4496     my $group_level        = $self->[_group_level_];
4497     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4498       && $group_level == $self->[_last_level_written_];
4499
4500     # Find groups with side comments, and remember the first nonblank comment
4501     my $j_sc_beg;
4502     my @todo;
4503     my $ng = -1;
4504     foreach my $item ( @{$rgroups} ) {
4505         $ng++;
4506         my ( $jbeg, $jend ) = @{$item};
4507         foreach my $j ( $jbeg .. $jend ) {
4508             my $line = $rlines->[$j];
4509             my $jmax = $line->get_jmax();
4510             if ( $line->get_rfield_lengths()->[$jmax] ) {
4511
4512                 # this group has a line with a side comment
4513                 push @todo, $ng;
4514                 if ( !defined($j_sc_beg) ) {
4515                     $j_sc_beg = $j;
4516                 }
4517                 last;
4518             }
4519         }
4520     }
4521
4522     # done if no groups with side comments
4523     return unless @todo;
4524
4525     # Count $num5 = number of comments in the 5 lines after the first comment
4526     # This is an important factor in a decision formula
4527     my $num5 = 1;
4528     for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
4529         my $ldiff = $jj - $j_sc_beg;
4530         last if ( $ldiff > 5 );
4531         my $line   = $rlines->[$jj];
4532         my $jmax   = $line->get_jmax();
4533         my $sc_len = $line->get_rfield_lengths()->[$jmax];
4534         next unless ($sc_len);
4535         $num5++;
4536     }
4537
4538     # Forget the old side comment location if necessary
4539     my $line = $rlines->[$j_sc_beg];
4540     my $lnum =
4541       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4542     my $keep_it =
4543       $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
4544     my $last_side_comment_column =
4545       $keep_it ? $self->[_last_side_comment_column_] : 0;
4546
4547     # If there are multiple groups we will do two passes
4548     # so that we can find a common alignment for all groups.
4549     my $MAX_PASS = @todo > 1 ? 2 : 1;
4550
4551     # Loop over passes
4552     my $max_comment_column = $last_side_comment_column;
4553     for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
4554
4555         # If there are two passes, then on the last pass make the old column
4556         # equal to the largest of the group.  This will result in the comments
4557         # being aligned if possible.
4558         if ( $PASS == $MAX_PASS ) {
4559             $last_side_comment_column = $max_comment_column;
4560         }
4561
4562         # Loop over the groups with side comments
4563         my $column_limit;
4564         foreach my $ng (@todo) {
4565             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4566
4567             # Note that since all lines in a group have common alignments, we
4568             # just have to work on one of the lines (the first line).
4569             my $line                    = $rlines->[$jbeg];
4570             my $jmax                    = $line->get_jmax();
4571             my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4572             last
4573               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4574
4575             # the maximum space without exceeding the line length:
4576             my $avail = $line->get_available_space_on_right();
4577
4578             # try to use the previous comment column
4579             my $side_comment_column = $line->get_column( $jmax - 1 );
4580             my $move = $last_side_comment_column - $side_comment_column;
4581
4582             # Remember the maximum possible column of the first line with
4583             # side comment
4584             if ( !defined($column_limit) ) {
4585                 $column_limit = $side_comment_column + $avail;
4586             }
4587
4588             next if ( $jmax <= 0 );
4589
4590             # but if this doesn't work, give up and use the minimum space
4591             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4592             if ( $move > $avail ) {
4593                 $move = $min_move;
4594             }
4595
4596             # but we want some minimum space to the comment
4597             if (   $move >= 0
4598                 && $j_sc_beg == 0
4599                 && $continuing_sc_flow )
4600             {
4601                 $min_move = 0;
4602             }
4603
4604             # remove constraints on hanging side comments
4605             if ($is_hanging_side_comment) { $min_move = 0 }
4606
4607             if ( $move < $min_move ) {
4608                 $move = $min_move;
4609             }
4610
4611             # don't exceed the available space
4612             if ( $move > $avail ) { $move = $avail }
4613
4614             # We can only increase space, never decrease.
4615             if ( $move < 0 ) { $move = 0 }
4616
4617             # Discover the largest column on the preliminary  pass
4618             if ( $PASS < $MAX_PASS ) {
4619                 my $col = $line->get_column( $jmax - 1 ) + $move;
4620
4621                 # but ignore columns too large for the starting line
4622                 if ( $col > $max_comment_column && $col < $column_limit ) {
4623                     $max_comment_column = $col;
4624                 }
4625             }
4626
4627             # Make the changes on the final pass
4628             else {
4629                 $line->increase_field_width( $jmax - 1, $move );
4630
4631                 # remember this column for the next group
4632                 $last_side_comment_column = $line->get_column( $jmax - 1 );
4633             }
4634         } ## end loop over groups
4635     } ## end loop over passes
4636
4637     # Find the last side comment
4638     my $j_sc_last;
4639     my $ng_last = $todo[-1];
4640     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4641     for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
4642         my $line = $rlines->[$jj];
4643         my $jmax = $line->get_jmax();
4644         if ( $line->get_rfield_lengths()->[$jmax] ) {
4645             $j_sc_last = $jj;
4646             last;
4647         }
4648     }
4649
4650     # Save final side comment info for possible use by the next batch
4651     if ( defined($j_sc_last) ) {
4652         my $line_number =
4653           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4654         $self->[_last_side_comment_column_]      = $last_side_comment_column;
4655         $self->[_last_side_comment_line_number_] = $line_number;
4656         $self->[_last_side_comment_level_]       = $group_level;
4657     }
4658     return;
4659 }
4660
4661 ###############################
4662 # CODE SECTION 6: Output Step A
4663 ###############################
4664
4665 sub valign_output_step_A {
4666
4667     ###############################################################
4668     # This is Step A in writing vertically aligned lines.
4669     # The line is prepared according to the alignments which have
4670     # been found. Then it is shipped to the next step.
4671     ###############################################################
4672
4673     my ( $self, $rinput_hash ) = @_;
4674
4675     my $line                 = $rinput_hash->{line};
4676     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
4677     my $do_not_align         = $rinput_hash->{do_not_align};
4678     my $group_leader_length  = $rinput_hash->{group_leader_length};
4679     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4680     my $level                = $rinput_hash->{level};
4681     my $maximum_line_length  = $rinput_hash->{maximum_line_length};
4682
4683     my $rfields                   = $line->get_rfields();
4684     my $rfield_lengths            = $line->get_rfield_lengths();
4685     my $leading_space_count       = $line->get_leading_space_count();
4686     my $outdent_long_lines        = $line->get_outdent_long_lines();
4687     my $maximum_field_index       = $line->get_jmax();
4688     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
4689     my $Kend                      = $line->get_Kend();
4690     my $level_end                 = $line->get_level_end();
4691
4692     # add any extra spaces
4693     if ( $leading_space_count > $group_leader_length ) {
4694         $leading_space_count += $min_ci_gap;
4695     }
4696
4697     my $str     = $rfields->[0];
4698     my $str_len = $rfield_lengths->[0];
4699
4700     # loop to concatenate all fields of this line and needed padding
4701     my $total_pad_count = 0;
4702     for my $j ( 1 .. $maximum_field_index ) {
4703
4704         # skip zero-length side comments
4705         last
4706           if (
4707             ( $j == $maximum_field_index )
4708             && ( !defined( $rfields->[$j] )
4709                 || ( $rfield_lengths->[$j] == 0 ) )
4710           );
4711
4712         # compute spaces of padding before this field
4713         my $col = $line->get_column( $j - 1 );
4714         my $pad = $col - ( $str_len + $leading_space_count );
4715
4716         if ($do_not_align) {
4717             $pad =
4718               ( $j < $maximum_field_index )
4719               ? 0
4720               : $self->[_rOpts_minimum_space_to_comment_] - 1;
4721         }
4722
4723         # if the -fpsc flag is set, move the side comment to the selected
4724         # column if and only if it is possible, ignoring constraints on
4725         # line length and minimum space to comment
4726         if (   $self->[_rOpts_fixed_position_side_comment_]
4727             && $j == $maximum_field_index )
4728         {
4729             my $newpad =
4730               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4731             if ( $newpad >= 0 ) { $pad = $newpad; }
4732         }
4733
4734         # accumulate the padding
4735         if ( $pad > 0 ) { $total_pad_count += $pad; }
4736
4737         # only add padding when we have a finite field;
4738         # this avoids extra terminal spaces if we have empty fields
4739         if ( $rfield_lengths->[$j] > 0 ) {
4740             $str .= ' ' x $total_pad_count;
4741             $str_len += $total_pad_count;
4742             $total_pad_count = 0;
4743             $str .= $rfields->[$j];
4744             $str_len += $rfield_lengths->[$j];
4745         }
4746         else {
4747             $total_pad_count = 0;
4748         }
4749     }
4750
4751     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4752
4753     # ship this line off
4754     $self->valign_output_step_B(
4755         {
4756             leading_space_count => $leading_space_count + $extra_leading_spaces,
4757             line                => $str,
4758             line_length         => $str_len,
4759             side_comment_length => $side_comment_length,
4760             outdent_long_lines  => $outdent_long_lines,
4761             rvertical_tightness_flags => $rvertical_tightness_flags,
4762             level                     => $level,
4763             level_end                 => $level_end,
4764             Kend                      => $Kend,
4765             maximum_line_length       => $maximum_line_length,
4766         }
4767     );
4768     return;
4769 }
4770
4771 sub combine_fields {
4772
4773     # We have a group of two lines for which we do not want to align tokens
4774     # between index $imax_align and the side comment.  So we will delete fields
4775     # between $imax_align and the side comment.  Alignments have already
4776     # been set so we have to adjust them.
4777
4778     my ( $line_0, $line_1, $imax_align ) = @_;
4779
4780     if ( !defined($imax_align) ) { $imax_align = -1 }
4781
4782     # First delete the unwanted tokens
4783     my $jmax_old       = $line_0->get_jmax();
4784     my @old_alignments = $line_0->get_alignments();
4785     my @idel           = ( $imax_align + 1 .. $jmax_old - 2 );
4786
4787     return unless (@idel);
4788
4789     foreach my $line ( $line_0, $line_1 ) {
4790         delete_selected_tokens( $line, \@idel );
4791     }
4792
4793     # Now adjust the alignments.  Note that the side comment alignment
4794     # is always at jmax-1, and there is an ending alignment at jmax.
4795     my @new_alignments;
4796     if ( $imax_align >= 0 ) {
4797         @new_alignments[ 0 .. $imax_align ] =
4798           @old_alignments[ 0 .. $imax_align ];
4799     }
4800
4801     my $jmax_new = $line_0->get_jmax();
4802
4803     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4804     $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4805     $line_0->set_alignments(@new_alignments);
4806     $line_1->set_alignments(@new_alignments);
4807     return;
4808 }
4809
4810 sub get_output_line_number {
4811
4812     # The output line number reported to a caller =
4813     # the number of items still in the buffer +
4814     # the number of items written.
4815     return $_[0]->group_line_count() +
4816       $_[0]->[_file_writer_object_]->get_output_line_number();
4817 }
4818
4819 ###############################
4820 # CODE SECTION 7: Output Step B
4821 ###############################
4822
4823 {    ## closure for sub valign_output_step_B
4824
4825     # These are values for a cache used by valign_output_step_B.
4826     my $cached_line_text;
4827     my $cached_line_text_length;
4828     my $cached_line_type;
4829     my $cached_line_opening_flag;
4830     my $cached_line_closing_flag;
4831     my $cached_seqno;
4832     my $cached_line_valid;
4833     my $cached_line_leading_space_count;
4834     my $cached_seqno_string;
4835     my $cached_line_Kend;
4836     my $cached_line_maximum_length;
4837     my $seqno_string;
4838     my $last_nonblank_seqno_string;
4839
4840     sub get_seqno_string {
4841         return $seqno_string;
4842     }
4843
4844     sub get_last_nonblank_seqno_string {
4845         return $last_nonblank_seqno_string;
4846     }
4847
4848     sub set_last_nonblank_seqno_string {
4849         my ($val) = @_;
4850         $last_nonblank_seqno_string = $val;
4851         return;
4852     }
4853
4854     sub get_cached_line_opening_flag {
4855         return $cached_line_opening_flag;
4856     }
4857
4858     sub get_cached_line_type {
4859         return $cached_line_type;
4860     }
4861
4862     sub set_cached_line_valid {
4863         my ($val) = @_;
4864         $cached_line_valid = $val;
4865         return;
4866     }
4867
4868     sub get_cached_seqno {
4869         return $cached_seqno;
4870     }
4871
4872     sub initialize_step_B_cache {
4873
4874         # valign_output_step_B cache:
4875         $cached_line_text                = "";
4876         $cached_line_text_length         = 0;
4877         $cached_line_type                = 0;
4878         $cached_line_opening_flag        = 0;
4879         $cached_line_closing_flag        = 0;
4880         $cached_seqno                    = 0;
4881         $cached_line_valid               = 0;
4882         $cached_line_leading_space_count = 0;
4883         $cached_seqno_string             = "";
4884         $cached_line_Kend                = undef;
4885         $cached_line_maximum_length      = undef;
4886
4887         # These vars hold a string of sequence numbers joined together used by
4888         # the cache
4889         $seqno_string               = "";
4890         $last_nonblank_seqno_string = "";
4891         return;
4892     }
4893
4894     sub _flush_cache {
4895         my ($self) = @_;
4896         if ($cached_line_type) {
4897             $seqno_string = $cached_seqno_string;
4898             $self->valign_output_step_C(
4899                 $cached_line_text,
4900                 $cached_line_leading_space_count,
4901                 $self->[_last_level_written_],
4902                 $cached_line_Kend,
4903             );
4904             $cached_line_type           = 0;
4905             $cached_line_text           = "";
4906             $cached_line_text_length    = 0;
4907             $cached_seqno_string        = "";
4908             $cached_line_Kend           = undef;
4909             $cached_line_maximum_length = undef;
4910         }
4911         return;
4912     }
4913
4914     sub valign_output_step_B {
4915
4916         ###############################################################
4917         # This is Step B in writing vertically aligned lines.
4918         # Vertical tightness is applied according to preset flags.
4919         # In particular this routine handles stacking of opening
4920         # and closing tokens.
4921         ###############################################################
4922
4923         my ( $self, $rinput ) = @_;
4924
4925         my $leading_space_count       = $rinput->{leading_space_count};
4926         my $str                       = $rinput->{line};
4927         my $str_length                = $rinput->{line_length};
4928         my $side_comment_length       = $rinput->{side_comment_length};
4929         my $outdent_long_lines        = $rinput->{outdent_long_lines};
4930         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4931         my $level                     = $rinput->{level};
4932         my $level_end                 = $rinput->{level_end};
4933         my $Kend                      = $rinput->{Kend};
4934         my $maximum_line_length       = $rinput->{maximum_line_length};
4935
4936         my $last_level_written = $self->[_last_level_written_];
4937
4938         # Useful -gcs test cases for wide characters are
4939         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
4940
4941         # handle outdenting of long lines:
4942         my $is_outdented_line;
4943         if ($outdent_long_lines) {
4944             my $excess =
4945               $str_length -
4946               $side_comment_length +
4947               $leading_space_count -
4948               $maximum_line_length;
4949             if ( $excess > 0 ) {
4950                 $leading_space_count = 0;
4951                 my $file_writer_object = $self->[_file_writer_object_];
4952                 my $last_outdented_line_at =
4953                   $file_writer_object->get_output_line_number();
4954                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
4955
4956                 my $outdented_line_count = $self->[_outdented_line_count_];
4957                 unless ($outdented_line_count) {
4958                     $self->[_first_outdented_line_at_] =
4959                       $last_outdented_line_at;
4960                 }
4961                 $outdented_line_count++;
4962                 $self->[_outdented_line_count_] = $outdented_line_count;
4963                 $is_outdented_line = 1;
4964             }
4965         }
4966
4967         # Make preliminary leading whitespace.  It could get changed
4968         # later by entabbing, so we have to keep track of any changes
4969         # to the leading_space_count from here on.
4970         my $leading_string =
4971           $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
4972         my $leading_string_length = length($leading_string);
4973
4974         # Unpack any recombination data; it was packed by
4975         # sub 'Formatter::set_vertical_tightness_flags'
4976
4977         # old   hash              Meaning
4978         # index key
4979         #
4980         # 0   _vt_type:           1=opening non-block    2=closing non-block
4981         #                         3=opening block brace  4=closing block brace
4982         #
4983         # 1a  _vt_opening_flag:  1=no multiple steps, 2=multiple steps ok
4984         # 1b  _vt_closing_flag:    spaces of padding to use if closing
4985         # 2   _vt_seqno:          sequence number of container
4986         # 3   _vt_valid flag:     do not append if this flag is false. Will be
4987         #           true if appropriate -vt flag is set.  Otherwise, Will be
4988         #           made true only for 2 line container in parens with -lp
4989         # 4   _vt_seqno_beg:      sequence number of first token of line
4990         # 5   _vt_seqno_end:      sequence number of last token of line
4991         # 6   _vt_min_lines:      min number of lines for joining opening cache,
4992         #                           0=no constraint
4993         # 7   _vt_max_lines:      max number of lines for joining opening cache,
4994         #                           0=no constraint
4995
4996         my ( $open_or_close, $opening_flag, $closing_flag, $seqno, $valid,
4997             $seqno_beg, $seqno_end );
4998         if ($rvertical_tightness_flags) {
4999
5000             $open_or_close = $rvertical_tightness_flags->{_vt_type};
5001             $opening_flag  = $rvertical_tightness_flags->{_vt_opening_flag};
5002             $closing_flag  = $rvertical_tightness_flags->{_vt_closing_flag};
5003             $seqno         = $rvertical_tightness_flags->{_vt_seqno};
5004             $valid         = $rvertical_tightness_flags->{_vt_valid_flag};
5005             $seqno_beg     = $rvertical_tightness_flags->{_vt_seqno_beg};
5006             $seqno_end     = $rvertical_tightness_flags->{_vt_seqno_end};
5007         }
5008
5009         $seqno_string = $seqno_end;
5010
5011         # handle any cached line ..
5012         # either append this line to it or write it out
5013         # Note: the function length() is used in this next test out of caution.
5014         # All testing has shown that the variable $cached_line_text_length is
5015         # correct, but its calculation is complex and a loss of cached text
5016         # would be a disaster.
5017         if ( length($cached_line_text) ) {
5018
5019             # Dump an invalid cached line
5020             if ( !$cached_line_valid ) {
5021                 $self->valign_output_step_C(
5022                     $cached_line_text,   $cached_line_leading_space_count,
5023                     $last_level_written, $cached_line_Kend
5024                 );
5025             }
5026
5027             # Handle cached line ending in OPENING tokens
5028             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
5029
5030                 my $gap = $leading_space_count - $cached_line_text_length;
5031
5032                 # handle option of just one tight opening per line:
5033                 if ( $cached_line_opening_flag == 1 ) {
5034                     if ( defined($open_or_close) && $open_or_close == 1 ) {
5035                         $gap = -1;
5036                     }
5037                 }
5038
5039                 # Do not join the lines if this might produce a one-line
5040                 # container which exceeds the maximum line length.  This is
5041                 # necessary prevent blinking, particularly with the combination
5042                 # -xci -pvt=2.  In that case a one-line block alternately forms
5043                 # and breaks, causing -xci to alternately turn on and off (case
5044                 # b765).
5045                 # Patched to fix cases b656 b862 b971 b972: always do the check
5046                 # if the maximum line length changes (due to -vmll).
5047                 if (
5048                     $gap >= 0
5049                     && ( $maximum_line_length != $cached_line_maximum_length
5050                         || ( defined($level_end) && $level > $level_end ) )
5051                   )
5052                 {
5053                     my $test_line_length =
5054                       $cached_line_text_length + $gap + $str_length;
5055
5056                     # Add a small tolerance in the length test (fixes case b862)
5057                     if ( $test_line_length > $cached_line_maximum_length - 2 ) {
5058                         $gap = -1;
5059                     }
5060                 }
5061
5062                 if ( $gap >= 0 && defined($seqno_beg) ) {
5063                     $maximum_line_length   = $cached_line_maximum_length;
5064                     $leading_string        = $cached_line_text . ' ' x $gap;
5065                     $leading_string_length = $cached_line_text_length + $gap;
5066                     $leading_space_count   = $cached_line_leading_space_count;
5067                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5068                     $level        = $last_level_written;
5069                 }
5070                 else {
5071                     $self->valign_output_step_C(
5072                         $cached_line_text,   $cached_line_leading_space_count,
5073                         $last_level_written, $cached_line_Kend
5074                     );
5075                 }
5076             }
5077
5078             # Handle cached line ending in CLOSING tokens
5079             else {
5080                 my $test_line =
5081                   $cached_line_text . ' ' x $cached_line_closing_flag . $str;
5082                 my $test_line_length =
5083                   $cached_line_text_length +
5084                   $cached_line_closing_flag +
5085                   $str_length;
5086                 if (
5087
5088                     # The new line must start with container
5089                     $seqno_beg
5090
5091                     # The container combination must be okay..
5092                     && (
5093
5094                         # okay to combine like types
5095                         ( $open_or_close == $cached_line_type )
5096
5097                         # closing block brace may append to non-block
5098                         || ( $cached_line_type == 2 && $open_or_close == 4 )
5099
5100                         # something like ');'
5101                         || ( !$open_or_close && $cached_line_type == 2 )
5102
5103                     )
5104
5105                     # The combined line must fit
5106                     && ( $test_line_length <= $cached_line_maximum_length )
5107                   )
5108                 {
5109
5110                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
5111
5112                     # Patch to outdent closing tokens ending # in ');' If we
5113                     # are joining a line like ');' to a previous stacked set of
5114                     # closing tokens, then decide if we may outdent the
5115                     # combined stack to the indentation of the ');'.  Since we
5116                     # should not normally outdent any of the other tokens more
5117                     # than the indentation of the lines that contained them, we
5118                     # will only do this if all of the corresponding opening
5119                     # tokens were on the same line.  This can happen with -sot
5120                     # and -sct.
5121
5122                     # For example, it is ok here:
5123                     #   __PACKAGE__->load_components( qw(
5124                     #         PK::Auto
5125                     #         Core
5126                     #   ));
5127                     #
5128                     # But, for example, we do not outdent in this example
5129                     # because that would put the closing sub brace out farther
5130                     # than the opening sub brace:
5131                     #
5132                     #   perltidy -sot -sct
5133                     #   $c->Tk::bind(
5134                     #       '<Control-f>' => sub {
5135                     #           my ($c) = @_;
5136                     #           my $e = $c->XEvent;
5137                     #           itemsUnderArea $c;
5138                     #       } );
5139                     #
5140                     if (   $str =~ /^\);/
5141                         && $cached_line_text =~ /^[\)\}\]\s]*$/ )
5142                     {
5143
5144                         # The way to tell this is if the stacked sequence
5145                         # numbers of this output line are the reverse of the
5146                         # stacked sequence numbers of the previous non-blank
5147                         # line of sequence numbers.  So we can join if the
5148                         # previous nonblank string of tokens is the mirror
5149                         # image.  For example if stack )}] is 13:8:6 then we
5150                         # are looking for a leading stack like [{( which
5151                         # is 6:8:13. We only need to check the two ends,
5152                         # because the intermediate tokens must fall in order.
5153                         # Note on speed: having to split on colons and
5154                         # eliminate multiple colons might appear to be slow,
5155                         # but it's not an issue because we almost never come
5156                         # through here.  In a typical file we don't.
5157
5158                         $seqno_string               =~ s/^:+//;
5159                         $last_nonblank_seqno_string =~ s/^:+//;
5160                         $seqno_string               =~ s/:+/:/g;
5161                         $last_nonblank_seqno_string =~ s/:+/:/g;
5162
5163                         # how many spaces can we outdent?
5164                         my $diff =
5165                           $cached_line_leading_space_count -
5166                           $leading_space_count;
5167                         if (   $diff > 0
5168                             && length($seqno_string)
5169                             && length($last_nonblank_seqno_string) ==
5170                             length($seqno_string) )
5171                         {
5172                             my @seqno_last =
5173                               ( split /:/, $last_nonblank_seqno_string );
5174                             my @seqno_now = ( split /:/, $seqno_string );
5175                             if (   @seqno_now
5176                                 && @seqno_last
5177                                 && $seqno_now[-1] == $seqno_last[0]
5178                                 && $seqno_now[0] == $seqno_last[-1] )
5179                             {
5180
5181                                 # OK to outdent ..
5182                                 # for absolute safety, be sure we only remove
5183                                 # whitespace
5184                                 my $ws = substr( $test_line, 0, $diff );
5185                                 if ( ( length($ws) == $diff )
5186                                     && $ws =~ /^\s+$/ )
5187                                 {
5188
5189                                     $test_line = substr( $test_line, $diff );
5190                                     $cached_line_leading_space_count -= $diff;
5191                                     $last_level_written =
5192                                       $self->level_change(
5193                                         $cached_line_leading_space_count,
5194                                         $diff, $last_level_written );
5195                                     $self->reduce_valign_buffer_indentation(
5196                                         $diff);
5197                                 }
5198
5199                                 # shouldn't happen, but not critical:
5200                                 ##else {
5201                                 ## ERROR transferring indentation here
5202                                 ##}
5203                             }
5204                         }
5205                     }
5206
5207                     # Change the args to look like we received the combined line
5208                     $str                   = $test_line;
5209                     $str_length            = $test_line_length;
5210                     $leading_string        = "";
5211                     $leading_string_length = 0;
5212                     $leading_space_count   = $cached_line_leading_space_count;
5213                     $level                 = $last_level_written;
5214                     $maximum_line_length   = $cached_line_maximum_length;
5215                 }
5216                 else {
5217                     $self->valign_output_step_C(
5218                         $cached_line_text,   $cached_line_leading_space_count,
5219                         $last_level_written, $cached_line_Kend
5220                     );
5221                 }
5222             }
5223         }
5224         $cached_line_type           = 0;
5225         $cached_line_text           = "";
5226         $cached_line_text_length    = 0;
5227         $cached_line_Kend           = undef;
5228         $cached_line_maximum_length = undef;
5229
5230         # make the line to be written
5231         my $line        = $leading_string . $str;
5232         my $line_length = $leading_string_length + $str_length;
5233
5234         # Safety check: be sure that a line to be cached as a stacked block
5235         # brace line ends in the appropriate opening or closing block brace.
5236         # This should always be the case if the caller set flags correctly.
5237         # Code '3' is for -sobb, code '4' is for -scbb.
5238         if ($open_or_close) {
5239             if (   $open_or_close == 3 && $line !~ /\{\s*$/
5240                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5241             {
5242                 $open_or_close = 0;
5243             }
5244         }
5245
5246         # write or cache this line ...
5247         # fix for case b999: do not cache an outdented line
5248         if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
5249         {
5250             $self->valign_output_step_C( $line, $leading_space_count, $level,
5251                 $Kend );
5252         }
5253         else {
5254             $cached_line_text                = $line;
5255             $cached_line_text_length         = $line_length;
5256             $cached_line_type                = $open_or_close;
5257             $cached_line_opening_flag        = $opening_flag;
5258             $cached_line_closing_flag        = $closing_flag;
5259             $cached_seqno                    = $seqno;
5260             $cached_line_valid               = $valid;
5261             $cached_line_leading_space_count = $leading_space_count;
5262             $cached_seqno_string             = $seqno_string;
5263             $cached_line_Kend                = $Kend;
5264             $cached_line_maximum_length      = $maximum_line_length;
5265         }
5266
5267         $self->[_last_level_written_]       = $level;
5268         $self->[_last_side_comment_length_] = $side_comment_length;
5269         return;
5270     }
5271 }
5272
5273 ###############################
5274 # CODE SECTION 8: Output Step C
5275 ###############################
5276
5277 {    ## closure for sub valign_output_step_C
5278
5279     # Vertical alignment buffer used by valign_output_step_C
5280     my $valign_buffer_filling;
5281     my @valign_buffer;
5282
5283     sub initialize_valign_buffer {
5284         @valign_buffer         = ();
5285         $valign_buffer_filling = "";
5286         return;
5287     }
5288
5289     sub dump_valign_buffer {
5290         my ($self) = @_;
5291         if (@valign_buffer) {
5292             foreach (@valign_buffer) {
5293                 $self->valign_output_step_D( @{$_} );
5294             }
5295             @valign_buffer = ();
5296         }
5297         $valign_buffer_filling = "";
5298         return;
5299     }
5300
5301     sub reduce_valign_buffer_indentation {
5302
5303         my ( $self, $diff ) = @_;
5304         if ( $valign_buffer_filling && $diff ) {
5305             my $max_valign_buffer = @valign_buffer;
5306             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5307                 my ( $line, $leading_space_count, $level, $Kend ) =
5308                   @{ $valign_buffer[$i] };
5309                 my $ws = substr( $line, 0, $diff );
5310                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5311                     $line = substr( $line, $diff );
5312                 }
5313                 if ( $leading_space_count >= $diff ) {
5314                     $leading_space_count -= $diff;
5315                     $level =
5316                       $self->level_change( $leading_space_count, $diff,
5317                         $level );
5318                 }
5319                 $valign_buffer[$i] =
5320                   [ $line, $leading_space_count, $level, $Kend ];
5321             }
5322         }
5323         return;
5324     }
5325
5326     sub valign_output_step_C {
5327
5328         ###############################################################
5329         # This is Step C in writing vertically aligned lines.
5330         # Lines are either stored in a buffer or passed along to the next step.
5331         # The reason for storing lines is that we may later want to reduce their
5332         # indentation when -sot and -sct are both used.
5333         ###############################################################
5334         my ( $self, @args ) = @_;
5335
5336         my $seqno_string               = get_seqno_string();
5337         my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
5338
5339         # Dump any saved lines if we see a line with an unbalanced opening or
5340         # closing token.
5341         $self->dump_valign_buffer()
5342           if ( $seqno_string && $valign_buffer_filling );
5343
5344         # Either store or write this line
5345         if ($valign_buffer_filling) {
5346             push @valign_buffer, [@args];
5347         }
5348         else {
5349             $self->valign_output_step_D(@args);
5350         }
5351
5352         # For lines starting or ending with opening or closing tokens..
5353         if ($seqno_string) {
5354             $last_nonblank_seqno_string = $seqno_string;
5355             set_last_nonblank_seqno_string($seqno_string);
5356
5357             # Start storing lines when we see a line with multiple stacked
5358             # opening tokens.
5359             # patch for RT #94354, requested by Colin Williams
5360             if (   $seqno_string =~ /^\d+(\:+\d+)+$/
5361                 && $args[0] !~ /^[\}\)\]\:\?]/ )
5362             {
5363
5364                 # This test is efficient but a little subtle: The first test
5365                 # says that we have multiple sequence numbers and hence
5366                 # multiple opening or closing tokens in this line.  The second
5367                 # part of the test rejects stacked closing and ternary tokens.
5368                 # So if we get here then we should have stacked unbalanced
5369                 # opening tokens.
5370
5371                 # Here is a complex example:
5372
5373                 # Foo($Bar[0], {  # (side comment)
5374                 #     baz => 1,
5375                 # });
5376
5377                 # The first line has sequence 6::4.  It does not begin with
5378                 # a closing token or ternary, so it passes the test and must be
5379                 # stacked opening tokens.
5380
5381                 # The last line has sequence 4:6 but is a stack of closing
5382                 # tokens, so it gets rejected.
5383
5384                 # Note that the sequence number of an opening token for a qw
5385                 # quote is a negative number and will be rejected.  For
5386                 # example, for the following line: skip_symbols([qw(
5387                 # $seqno_string='10:5:-1'.  It would be okay to accept it but I
5388                 # decided not to do this after testing.
5389
5390                 $valign_buffer_filling = $seqno_string;
5391
5392             }
5393         }
5394         return;
5395     }
5396 }
5397
5398 ###############################
5399 # CODE SECTION 9: Output Step D
5400 ###############################
5401
5402 sub valign_output_step_D {
5403
5404     ###############################################################
5405     # This is Step D in writing vertically aligned lines.
5406     # It is the end of the vertical alignment pipeline.
5407     # Write one vertically aligned line of code to the output object.
5408     ###############################################################
5409
5410     my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5411
5412     # The line is currently correct if there is no tabbing (recommended!)
5413     # We may have to lop off some leading spaces and replace with tabs.
5414     if ( $leading_space_count > 0 ) {
5415
5416         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5417         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5418         my $rOpts_entab_leading_whitespace =
5419           $self->[_rOpts_entab_leading_whitespace_];
5420
5421         # Nothing to do if no tabs
5422         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5423             || $rOpts_indent_columns <= 0 )
5424         {
5425
5426             # nothing to do
5427         }
5428
5429         # Handle entab option
5430         elsif ($rOpts_entab_leading_whitespace) {
5431
5432             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5433             # not correctly entabbed, nor were side comments: Increase leading
5434             # space count for a padded line to get correct tabbing
5435             if ( $line =~ /^(\s+)(.*)$/ ) {
5436                 my $spaces = length($1);
5437                 if ( $spaces > $leading_space_count ) {
5438                     $leading_space_count = $spaces;
5439                 }
5440             }
5441
5442             my $space_count =
5443               $leading_space_count % $rOpts_entab_leading_whitespace;
5444             my $tab_count =
5445               int( $leading_space_count / $rOpts_entab_leading_whitespace );
5446             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
5447             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5448                 substr( $line, 0, $leading_space_count ) = $leading_string;
5449             }
5450             else {
5451
5452                 # shouldn't happen - program error counting whitespace
5453                 # - skip entabbing
5454                 DEBUG_TABS
5455                   && warning(
5456 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5457                   );
5458             }
5459         }
5460
5461         # Handle option of one tab per level
5462         else {
5463             my $leading_string = ( "\t" x $level );
5464             my $space_count =
5465               $leading_space_count - $level * $rOpts_indent_columns;
5466
5467             # shouldn't happen:
5468             if ( $space_count < 0 ) {
5469
5470                 # But it could be an outdented comment
5471                 if ( $line !~ /^\s*#/ ) {
5472                     DEBUG_TABS
5473                       && warning(
5474 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5475                       );
5476                 }
5477                 $leading_string = ( ' ' x $leading_space_count );
5478             }
5479             else {
5480                 $leading_string .= ( ' ' x $space_count );
5481             }
5482             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5483                 substr( $line, 0, $leading_space_count ) = $leading_string;
5484             }
5485             else {
5486
5487                 # shouldn't happen - program error counting whitespace
5488                 # we'll skip entabbing
5489                 DEBUG_TABS
5490                   && warning(
5491 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5492                   );
5493             }
5494         }
5495     }
5496     my $file_writer_object = $self->[_file_writer_object_];
5497     $file_writer_object->write_code_line( $line . "\n", $Kend );
5498
5499     return;
5500 }
5501
5502 {    ## closure for sub get_leading_string
5503
5504     my @leading_string_cache;
5505
5506     sub initialize_leading_string_cache {
5507         @leading_string_cache = ();
5508         return;
5509     }
5510
5511     sub get_leading_string {
5512
5513         # define the leading whitespace string for this line..
5514         my ( $self, $leading_whitespace_count, $group_level ) = @_;
5515
5516         # Handle case of zero whitespace, which includes multi-line quotes
5517         # (which may have a finite level; this prevents tab problems)
5518         if ( $leading_whitespace_count <= 0 ) {
5519             return "";
5520         }
5521
5522         # look for previous result
5523         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5524             return $leading_string_cache[$leading_whitespace_count];
5525         }
5526
5527         # must compute a string for this number of spaces
5528         my $leading_string;
5529
5530         # Handle simple case of no tabs
5531         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5532         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5533         my $rOpts_entab_leading_whitespace =
5534           $self->[_rOpts_entab_leading_whitespace_];
5535
5536         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5537             || $rOpts_indent_columns <= 0 )
5538         {
5539             $leading_string = ( ' ' x $leading_whitespace_count );
5540         }
5541
5542         # Handle entab option
5543         elsif ($rOpts_entab_leading_whitespace) {
5544             my $space_count =
5545               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5546             my $tab_count = int(
5547                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5548             $leading_string = "\t" x $tab_count . ' ' x $space_count;
5549         }
5550
5551         # Handle option of one tab per level
5552         else {
5553             $leading_string = ( "\t" x $group_level );
5554             my $space_count =
5555               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5556
5557             # shouldn't happen:
5558             if ( $space_count < 0 ) {
5559                 DEBUG_TABS
5560                   && warning(
5561 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5562                   );
5563
5564                 # -- skip entabbing
5565                 $leading_string = ( ' ' x $leading_whitespace_count );
5566             }
5567             else {
5568                 $leading_string .= ( ' ' x $space_count );
5569             }
5570         }
5571         $leading_string_cache[$leading_whitespace_count] = $leading_string;
5572         return $leading_string;
5573     }
5574 } ## end get_leading_string
5575
5576 ##########################
5577 # CODE SECTION 10: Summary
5578 ##########################
5579
5580 sub report_anything_unusual {
5581     my $self = shift;
5582
5583     my $outdented_line_count = $self->[_outdented_line_count_];
5584     if ( $outdented_line_count > 0 ) {
5585         write_logfile_entry(
5586             "$outdented_line_count long lines were outdented:\n");
5587         my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5588         write_logfile_entry(
5589             "  First at output line $first_outdented_line_at\n");
5590
5591         if ( $outdented_line_count > 1 ) {
5592             my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5593             write_logfile_entry(
5594                 "   Last at output line $last_outdented_line_at\n");
5595         }
5596         write_logfile_entry(
5597             "  use -noll to prevent outdenting, -l=n to increase line length\n"
5598         );
5599         write_logfile_entry("\n");
5600     }
5601     return;
5602 }
5603 1;