lib/Perl/Tidy/VerticalAligner.pm

   1 package Perl::Tidy::VerticalAligner;
   2 use strict;
   3 use warnings;
   4 our $VERSION = '20210717';
   5
   6 use Perl::Tidy::VerticalAligner::Alignment;
   7 use Perl::Tidy::VerticalAligner::Line;
   8
   9 # The Perl::Tidy::VerticalAligner package collects output lines and
  10 # attempts to line up certain common tokens, such as => and #, which are
  11 # identified by the calling routine.
  12 #
  13 # Usage:
  14 #   - Initiate an object with a call to new().
  15 #   - Write lines one-by-one with calls to valign_input().
  16 #   - Make a final call to flush() to empty the pipeline.
  17 #
  18 # The sub valign_input collects lines into groups.  When a group reaches
  19 # the maximum possible size it is processed for alignment and output.
  20 # The maximum group size is reached whenerver there is a change in indentation
  21 # level, a blank line, a block comment, or an external flush call.  The calling
  22 # routine may also force a break in alignment at any time.
  23 #
  24 # If the calling routine needs to interrupt the output and send other text to
  25 # the output, it must first call flush() to empty the output pipeline.  This
  26 # might occur for example if a block of pod text needs to be sent to the output
  27 # between blocks of code.
  28
  29 # It is essential that a final call to flush() be made. Otherwise some
  30 # final lines of text will be lost.
  31
  32 # Index...
  33 # CODE SECTION 1: Preliminary code, global definitions and sub new
  34 #                 sub new
  35 # CODE SECTION 2: Some Basic Utilities
  36 # CODE SECTION 3: Code to accept input and form groups
  37 #                 sub valign_input
  38 # CODE SECTION 4: Code to process comment lines
  39 #                 sub _flush_comment_lines
  40 # CODE SECTION 5: Code to process groups of code lines
  41 #                 sub _flush_group_lines
  42 # CODE SECTION 6: Output Step A
  43 #                 sub valign_output_step_A
  44 # CODE SECTION 7: Output Step B
  45 #                 sub valign_output_step_B
  46 # CODE SECTION 8: Output Step C
  47 #                 sub valign_output_step_C
  48 # CODE SECTION 9: Output Step D
  49 #                 sub valign_output_step_D
  50 # CODE SECTION 10: Summary
  51 #                 sub report_anything_unusual
  52
  53 ##################################################################
  54 # CODE SECTION 1: Preliminary code, global definitions and sub new
  55 ##################################################################
  56
  57 sub AUTOLOAD {
  58
  59     # Catch any undefined sub calls so that we are sure to get
  60     # some diagnostic information.  This sub should never be called
  61     # except for a programming error.
  62     our $AUTOLOAD;
  63     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  64     my ( $pkg, $fname, $lno ) = caller();
  65     my $my_package = __PACKAGE__;
  66     print STDERR <<EOM;
  67 ======================================================================
  68 Error detected in package '$my_package', version $VERSION
  69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  70 Called from package: '$pkg'
  71 Called from File '$fname'  at line '$lno'
  72 This error is probably due to a recent programming change
  73 ======================================================================
  74 EOM
  75     exit 1;
  76 }
  77
  78 sub DESTROY {
  79
  80     # required to avoid call to AUTOLOAD in some versions of perl
  81 }
  82
  83 BEGIN {
  84
  85     # Define the fixed indexes for variables in $self, which is an array
  86     # reference.  Note the convention of leading and trailing underscores to
  87     # keep them unique.
  88     my $i = 0;
  89     use constant {
  90         _file_writer_object_ => $i++,
  91         _logger_object_      => $i++,
  92         _diagnostics_object_ => $i++,
  93         _length_function_    => $i++,
  94
  95         _rOpts_                              => $i++,
  96         _rOpts_indent_columns_               => $i++,
  97         _rOpts_tabs_                         => $i++,
  98         _rOpts_entab_leading_whitespace_     => $i++,
  99         _rOpts_fixed_position_side_comment_  => $i++,
 100         _rOpts_minimum_space_to_comment_     => $i++,
 101         _rOpts_maximum_line_length_          => $i++,
 102         _rOpts_variable_maximum_line_length_ => $i++,
 103         _rOpts_valign_                       => $i++,
 104
 105         _last_level_written_            => $i++,
 106         _last_side_comment_column_      => $i++,
 107         _last_side_comment_line_number_ => $i++,
 108         _last_side_comment_length_      => $i++,
 109         _last_side_comment_level_       => $i++,
 110         _outdented_line_count_          => $i++,
 111         _first_outdented_line_at_       => $i++,
 112         _last_outdented_line_at_        => $i++,
 113         _consecutive_block_comments_    => $i++,
 114
 115         _rgroup_lines_                => $i++,
 116         _group_level_                 => $i++,
 117         _group_type_                  => $i++,
 118         _zero_count_                  => $i++,
 119         _last_leading_space_count_    => $i++,
 120         _comment_leading_space_count_ => $i++,
 121     };
 122
 123     # Debug flag. This is a relic from the original program development
 124     # looking for problems with tab characters.  Caution: this debug flag can
 125     # produce a lot of output It should be 0 except when debugging small
 126     # scripts.
 127
 128     use constant DEBUG_TABS => 0;
 129
 130     my $debug_warning = sub {
 131         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
 132         return;
 133     };
 134
 135     DEBUG_TABS && $debug_warning->('TABS');
 136
 137 }
 138
 139 sub new {
 140
 141     my ( $class, @args ) = @_;
 142
 143     my %defaults = (
 144         rOpts              => undef,
 145         file_writer_object => undef,
 146         logger_object      => undef,
 147         diagnostics_object => undef,
 148         length_function    => sub { return length( $_[0] ) },
 149     );
 150     my %args = ( %defaults, @args );
 151
 152     # Initialize other caches and buffers
 153     initialize_step_B_cache();
 154     initialize_valign_buffer();
 155     initialize_leading_string_cache();
 156     initialize_decode();
 157
 158     # Initialize all variables in $self.
 159     # To add an item to $self, first define a new constant index in the BEGIN
 160     # section.
 161     my $self = [];
 162
 163     # objects
 164     $self->[_file_writer_object_] = $args{file_writer_object};
 165     $self->[_logger_object_]      = $args{logger_object};
 166     $self->[_diagnostics_object_] = $args{diagnostics_object};
 167     $self->[_length_function_]    = $args{length_function};
 168
 169     # shortcuts to user options
 170     my $rOpts = $args{rOpts};
 171
 172     $self->[_rOpts_]                = $rOpts;
 173     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
 174     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
 175     $self->[_rOpts_entab_leading_whitespace_] =
 176       $rOpts->{'entab-leading-whitespace'};
 177     $self->[_rOpts_fixed_position_side_comment_] =
 178       $rOpts->{'fixed-position-side-comment'};
 179     $self->[_rOpts_minimum_space_to_comment_] =
 180       $rOpts->{'minimum-space-to-comment'};
 181     $self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'};
 182     $self->[_rOpts_variable_maximum_line_length_] =
 183       $rOpts->{'variable-maximum-line-length'};
 184     $self->[_rOpts_valign_] = $rOpts->{'valign'};
 185
 186     # Batch of lines being collected
 187     $self->[_rgroup_lines_]                = [];
 188     $self->[_group_level_]                 = 0;
 189     $self->[_group_type_]                  = "";
 190     $self->[_zero_count_]                  = 0;
 191     $self->[_comment_leading_space_count_] = 0;
 192     $self->[_last_leading_space_count_]    = 0;
 193
 194     # Memory of what has been processed
 195     $self->[_last_level_written_]            = -1;
 196     $self->[_last_side_comment_column_]      = 0;
 197     $self->[_last_side_comment_line_number_] = 0;
 198     $self->[_last_side_comment_length_]      = 0;
 199     $self->[_last_side_comment_level_]       = -1;
 200     $self->[_outdented_line_count_]          = 0;
 201     $self->[_first_outdented_line_at_]       = 0;
 202     $self->[_last_outdented_line_at_]        = 0;
 203     $self->[_consecutive_block_comments_]    = 0;
 204
 205     bless $self, $class;
 206     return $self;
 207 }
 208
 209 #################################
 210 # CODE SECTION 2: Basic Utilities
 211 #################################
 212
 213 sub flush {
 214
 215     # flush() is the external call to completely empty the pipeline.
 216     my ($self) = @_;
 217
 218     # push things out the pipline...
 219
 220     # push out any current group lines
 221     $self->_flush_group_lines();
 222
 223     # then anything left in the cache of step_B
 224     $self->_flush_cache();
 225
 226     # then anything left in the buffer of step_C
 227     $self->dump_valign_buffer();
 228
 229     return;
 230 }
 231
 232 sub initialize_for_new_group {
 233     my ($self) = @_;
 234
 235     $self->[_rgroup_lines_]                = [];
 236     $self->[_group_type_]                  = "";
 237     $self->[_zero_count_]                  = 0;
 238     $self->[_comment_leading_space_count_] = 0;
 239     $self->[_last_leading_space_count_]    = 0;
 240
 241     # Note that the value for _group_level_ is
 242     # handled separately in sub valign_input
 243     return;
 244 }
 245
 246 sub group_line_count {
 247     return +@{ $_[0]->[_rgroup_lines_] };
 248 }
 249
 250 # interface to Perl::Tidy::Diagnostics routines
 251 # For debugging; not currently used
 252 sub write_diagnostics {
 253     my ( $self, $msg ) = @_;
 254     my $diagnostics_object = $self->[_diagnostics_object_];
 255     if ($diagnostics_object) {
 256         $diagnostics_object->write_diagnostics($msg);
 257     }
 258     return;
 259 }
 260
 261 # interface to Perl::Tidy::Logger routines
 262 sub warning {
 263     my ( $self, $msg ) = @_;
 264     my $logger_object = $self->[_logger_object_];
 265     if ($logger_object) {
 266         $logger_object->warning($msg);
 267     }
 268     return;
 269 }
 270
 271 sub write_logfile_entry {
 272     my ( $self, $msg ) = @_;
 273     my $logger_object = $self->[_logger_object_];
 274     if ($logger_object) {
 275         $logger_object->write_logfile_entry($msg);
 276     }
 277     return;
 278 }
 279
 280 sub report_definite_bug {
 281     my ( $self, $msg ) = @_;
 282     my $logger_object = $self->[_logger_object_];
 283     if ($logger_object) {
 284         $logger_object->report_definite_bug();
 285     }
 286     return;
 287 }
 288
 289 sub get_cached_line_count {
 290     my $self = shift;
 291     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
 292 }
 293
 294 sub get_spaces {
 295
 296     # return the number of leading spaces associated with an indentation
 297     # variable $indentation is either a constant number of spaces or an
 298     # object with a get_spaces method.
 299     my $indentation = shift;
 300     return ref($indentation) ? $indentation->get_spaces() : $indentation;
 301 }
 302
 303 sub get_recoverable_spaces {
 304
 305     # return the number of spaces (+ means shift right, - means shift left)
 306     # that we would like to shift a group of lines with the same indentation
 307     # to get them to line up with their opening parens
 308     my $indentation = shift;
 309     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
 310 }
 311
 312 sub maximum_line_length_for_level {
 313
 314     # return maximum line length for line starting with a given level
 315     my ( $self, $level ) = @_;
 316     my $maximum_line_length = $self->[_rOpts_maximum_line_length_];
 317     if ( $self->[_rOpts_variable_maximum_line_length_] ) {
 318         if ( $level < 0 ) { $level = 0 }
 319         $maximum_line_length += $level * $self->[_rOpts_indent_columns_];
 320     }
 321     return $maximum_line_length;
 322 }
 323
 324 ######################################################
 325 # CODE SECTION 3: Code to accept input and form groups
 326 ######################################################
 327
 328 sub push_group_line {
 329
 330     my ( $self, $new_line ) = @_;
 331     my $rgroup_lines = $self->[_rgroup_lines_];
 332     push @{$rgroup_lines}, $new_line;
 333     return;
 334 }
 335
 336 use constant DEBUG_VALIGN      => 0;
 337 use constant SC_LONG_LINE_DIFF => 12;
 338
 339 sub valign_input {
 340
 341     # Place one line in the current vertical group.
 342     #
 343     # The input parameters are:
 344     #     $level = indentation level of this line
 345     #     $rfields = reference to array of fields
 346     #     $rpatterns = reference to array of patterns, one per field
 347     #     $rtokens   = reference to array of tokens starting fields 1,2,..
 348     #
 349     # Here is an example of what this package does.  In this example,
 350     # we are trying to line up both the '=>' and the '#'.
 351     #
 352     #         '18' => 'grave',    #   \`
 353     #         '19' => 'acute',    #   `'
 354     #         '20' => 'caron',    #   \v
 355     # <-tabs-><f1-><--field 2 ---><-f3->
 356     # |            |              |    |
 357     # |            |              |    |
 358     # col1        col2         col3 col4
 359     #
 360     # The calling routine has already broken the entire line into 3 fields as
 361     # indicated.  (So the work of identifying promising common tokens has
 362     # already been done).
 363     #
 364     # In this example, there will be 2 tokens being matched: '=>' and '#'.
 365     # They are the leading parts of fields 2 and 3, but we do need to know
 366     # what they are so that we can dump a group of lines when these tokens
 367     # change.
 368     #
 369     # The fields contain the actual characters of each field.  The patterns
 370     # are like the fields, but they contain mainly token types instead
 371     # of tokens, so they have fewer characters.  They are used to be
 372     # sure we are matching fields of similar type.
 373     #
 374     # In this example, there will be 4 column indexes being adjusted.  The
 375     # first one is always at zero.  The interior columns are at the start of
 376     # the matching tokens, and the last one tracks the maximum line length.
 377     #
 378     # Each time a new line comes in, it joins the current vertical
 379     # group if possible.  Otherwise it causes the current group to be flushed
 380     # and a new group is started.
 381     #
 382     # For each new group member, the column locations are increased, as
 383     # necessary, to make room for the new fields.  When the group is finally
 384     # output, these column numbers are used to compute the amount of spaces of
 385     # padding needed for each field.
 386     #
 387     # Programming note: the fields are assumed not to have any tab characters.
 388     # Tabs have been previously removed except for tabs in quoted strings and
 389     # side comments.  Tabs in these fields can mess up the column counting.
 390     # The log file warns the user if there are any such tabs.
 391
 392     my ( $self, $rline_hash ) = @_;
 393
 394     my $level                     = $rline_hash->{level};
 395     my $level_end                 = $rline_hash->{level_end};
 396     my $level_adj                 = $rline_hash->{level_adj};
 397     my $indentation               = $rline_hash->{indentation};
 398     my $list_seqno                = $rline_hash->{list_seqno};
 399     my $outdent_long_lines        = $rline_hash->{outdent_long_lines};
 400     my $is_terminal_ternary       = $rline_hash->{is_terminal_ternary};
 401     my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
 402     my $level_jump                = $rline_hash->{level_jump};
 403     my $rfields                   = $rline_hash->{rfields};
 404     my $rtokens                   = $rline_hash->{rtokens};
 405     my $rpatterns                 = $rline_hash->{rpatterns};
 406     my $rfield_lengths            = $rline_hash->{rfield_lengths};
 407     my $terminal_block_type       = $rline_hash->{terminal_block_type};
 408     my $batch_count               = $rline_hash->{batch_count};
 409     my $break_alignment_before    = $rline_hash->{break_alignment_before};
 410     my $break_alignment_after     = $rline_hash->{break_alignment_after};
 411     my $Kend                      = $rline_hash->{Kend};
 412     my $ci_level                  = $rline_hash->{ci_level};
 413
 414     # The index '$Kend' is a value which passed along with the line text to sub
 415     # 'write_code_line' for a convergence check.
 416
 417     # number of fields is $jmax
 418     # number of tokens between fields is $jmax-1
 419     my $jmax = @{$rfields} - 1;
 420
 421     my $leading_space_count = get_spaces($indentation);
 422
 423     # set outdented flag to be sure we either align within statements or
 424     # across statement boundaries, but not both.
 425     my $is_outdented =
 426       $self->[_last_leading_space_count_] > $leading_space_count;
 427     $self->[_last_leading_space_count_] = $leading_space_count;
 428
 429     # Identify a hanging side comment.  Hanging side comments have an empty
 430     # initial field.
 431     my $is_hanging_side_comment =
 432       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
 433
 434     # Undo outdented flag for a hanging side comment
 435     $is_outdented = 0 if $is_hanging_side_comment;
 436
 437     # Identify a block comment.
 438     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
 439
 440     # Block comment .. update count
 441     if ($is_block_comment) {
 442         $self->[_consecutive_block_comments_]++;
 443     }
 444
 445     # Not a block comment ..
 446     # Forget side comment column if we saw 2 or more block comments,
 447     # and reset the count
 448     else {
 449
 450         if ( $self->[_consecutive_block_comments_] > 1 ) {
 451             $self->forget_side_comment();
 452         }
 453         $self->[_consecutive_block_comments_] = 0;
 454     }
 455
 456     # Reset side comment location if we are entering a new block from level 0.
 457     # This is intended to keep them from drifting too far to the right.
 458     if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) {
 459         $self->forget_side_comment();
 460     }
 461
 462     my $group_level = $self->[_group_level_];
 463
 464     DEBUG_VALIGN && do {
 465         my $nlines = $self->group_line_count();
 466         print STDOUT
 467 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
 468     };
 469
 470     # Validate cached line if necessary: If we can produce a container
 471     # with just 2 lines total by combining an existing cached opening
 472     # token with the closing token to follow, then we will mark both
 473     # cached flags as valid.
 474     my $cached_line_type = get_cached_line_type();
 475     if ($cached_line_type) {
 476         my $cached_line_flag = get_cached_line_flag();
 477         if ($rvertical_tightness_flags) {
 478             my $cached_seqno = get_cached_seqno();
 479             if (   $cached_seqno
 480                 && $self->group_line_count() <= 1
 481                 && $rvertical_tightness_flags->[2]
 482                 && $rvertical_tightness_flags->[2] == $cached_seqno )
 483             {
 484                 $rvertical_tightness_flags->[3] ||= 1;
 485                 set_cached_line_valid(1);
 486             }
 487         }
 488
 489         # do not join an opening block brace with an unbalanced line
 490         # unless requested with a flag value of 2
 491         if (   $cached_line_type == 3
 492             && !$self->group_line_count()
 493             && $cached_line_flag < 2
 494             && $level_jump != 0 )
 495         {
 496             set_cached_line_valid(0);
 497         }
 498     }
 499
 500     # shouldn't happen:
 501     if ( $level < 0 ) { $level = 0 }
 502
 503     # do not align code across indentation level changes
 504     # or if vertical alignment is turned off for debugging
 505     if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) {
 506
 507         $self->_flush_group_lines( $level - $group_level );
 508
 509         $group_level = $level;
 510         $self->[_group_level_] = $group_level;
 511
 512         # wait until after the above flush to get the leading space
 513         # count because it may have been changed if the -icp flag is in
 514         # effect
 515         $leading_space_count = get_spaces($indentation);
 516
 517     }
 518
 519     # --------------------------------------------------------------------
 520     # Collect outdentable block COMMENTS
 521     # --------------------------------------------------------------------
 522     my $is_blank_line = "";
 523     if ( $self->[_group_type_] eq 'COMMENT' ) {
 524         if (
 525             (
 526                    $is_block_comment
 527                 && $outdent_long_lines
 528                 && $leading_space_count ==
 529                 $self->[_comment_leading_space_count_]
 530             )
 531             || $is_blank_line
 532           )
 533         {
 534
 535             # Note that for a comment group we are not storing a line
 536             # but rather just the text and its length.
 537             $self->push_group_line(
 538                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 539             return;
 540         }
 541         else {
 542             $self->_flush_group_lines();
 543         }
 544     }
 545
 546     my $rgroup_lines = $self->[_rgroup_lines_];
 547     if ( $break_alignment_before && @{$rgroup_lines} ) {
 548         $rgroup_lines->[-1]->set_end_group(1);
 549     }
 550
 551     # --------------------------------------------------------------------
 552     # add dummy fields for terminal ternary
 553     # --------------------------------------------------------------------
 554     my $j_terminal_match;
 555
 556     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
 557         $j_terminal_match =
 558           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
 559             $rpatterns, $rfield_lengths, $group_level, );
 560         $jmax = @{$rfields} - 1;
 561     }
 562
 563     # --------------------------------------------------------------------
 564     # add dummy fields for else statement
 565     # --------------------------------------------------------------------
 566
 567     # Note the trailing space after 'else' here. If there were no space between
 568     # the else and the next '{' then we would not be able to do vertical
 569     # alignment of the '{'.
 570     if (   $rfields->[0] eq 'else '
 571         && @{$rgroup_lines}
 572         && $level_jump == 0 )
 573     {
 574
 575         $j_terminal_match =
 576           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
 577             $rpatterns, $rfield_lengths );
 578         $jmax = @{$rfields} - 1;
 579     }
 580
 581     # --------------------------------------------------------------------
 582     # Handle simple line of code with no fields to match.
 583     # --------------------------------------------------------------------
 584     if ( $jmax <= 0 ) {
 585         $self->[_zero_count_]++;
 586
 587         if ( @{$rgroup_lines}
 588             && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
 589           )
 590         {
 591
 592             # flush the current group if it has some aligned columns..
 593             # or we haven't seen a comment lately
 594             if (   $rgroup_lines->[0]->get_jmax() > 1
 595                 || $self->[_zero_count_] > 3 )
 596             {
 597                 $self->_flush_group_lines();
 598             }
 599         }
 600
 601         # start new COMMENT group if this comment may be outdented
 602         if (   $is_block_comment
 603             && $outdent_long_lines
 604             && !$self->group_line_count() )
 605         {
 606             $self->[_group_type_]                  = 'COMMENT';
 607             $self->[_comment_leading_space_count_] = $leading_space_count;
 608             $self->push_group_line(
 609                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
 610             return;
 611         }
 612
 613         # just write this line directly if no current group, no side comment,
 614         # and no space recovery is needed.
 615         if (   !$self->group_line_count()
 616             && !get_recoverable_spaces($indentation) )
 617         {
 618
 619             $self->valign_output_step_B(
 620                 {
 621                     leading_space_count       => $leading_space_count,
 622                     line                      => $rfields->[0],
 623                     line_length               => $rfield_lengths->[0],
 624                     side_comment_length       => 0,
 625                     outdent_long_lines        => $outdent_long_lines,
 626                     rvertical_tightness_flags => $rvertical_tightness_flags,
 627                     level                     => $level,
 628                     level_end                 => $level_end,
 629                     Kend                      => $Kend,
 630                 }
 631             );
 632
 633             return;
 634         }
 635     }
 636     else {
 637         $self->[_zero_count_] = 0;
 638     }
 639
 640     my $maximum_line_length_for_level =
 641       $self->maximum_line_length_for_level($level);
 642
 643     # --------------------------------------------------------------------
 644     # It simplifies things to create a zero length side comment
 645     # if none exists.
 646     # --------------------------------------------------------------------
 647     $self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths );
 648     $jmax = @{$rfields} - 1;
 649
 650     # --------------------------------------------------------------------
 651     # create an object to hold this line
 652     # --------------------------------------------------------------------
 653     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
 654         {
 655             jmax                      => $jmax,
 656             rtokens                   => $rtokens,
 657             rfields                   => $rfields,
 658             rpatterns                 => $rpatterns,
 659             rfield_lengths            => $rfield_lengths,
 660             indentation               => $indentation,
 661             leading_space_count       => $leading_space_count,
 662             outdent_long_lines        => $outdent_long_lines,
 663             list_seqno                => $list_seqno,
 664             list_type                 => "",
 665             is_hanging_side_comment   => $is_hanging_side_comment,
 666             maximum_line_length       => $maximum_line_length_for_level,
 667             rvertical_tightness_flags => $rvertical_tightness_flags,
 668             is_terminal_ternary       => $is_terminal_ternary,
 669             j_terminal_match          => $j_terminal_match,
 670             end_group                 => $break_alignment_after,
 671             Kend                      => $Kend,
 672             ci_level                  => $ci_level,
 673             level                     => $level,
 674             level_end                 => $level_end,
 675             imax_pair                 => -1,
 676         }
 677     );
 678
 679     # --------------------------------------------------------------------
 680     # Decide if this is a simple list of items.
 681     # We use this to be less restrictive in deciding what to align.
 682     # --------------------------------------------------------------------
 683     decide_if_list($new_line) if ($list_seqno);
 684
 685     # --------------------------------------------------------------------
 686     # Append this line to the current group (or start new group)
 687     # --------------------------------------------------------------------
 688
 689     $self->push_group_line($new_line);
 690
 691     # output this group if it ends in a terminal else or ternary line
 692     if ( defined($j_terminal_match) ) {
 693         $self->_flush_group_lines();
 694     }
 695
 696     # Force break after jump to lower level
 697     if ( $level_jump < 0 ) {
 698         $self->_flush_group_lines($level_jump);
 699     }
 700
 701     # --------------------------------------------------------------------
 702     # Some old debugging stuff
 703     # --------------------------------------------------------------------
 704     DEBUG_VALIGN && do {
 705         print STDOUT "exiting valign_input fields:";
 706         dump_array( @{$rfields} );
 707         print STDOUT "exiting valign_input tokens:";
 708         dump_array( @{$rtokens} );
 709         print STDOUT "exiting valign_input patterns:";
 710         dump_array( @{$rpatterns} );
 711     };
 712
 713     return;
 714 }
 715
 716 sub join_hanging_comment {
 717
 718     # Add dummy fields to a hanging side comment to make it look
 719     # like the first line in its potential group.  This simplifies
 720     # the coding.
 721     my ( $new_line, $old_line ) = @_;
 722
 723     my $jmax = $new_line->get_jmax();
 724
 725     # must be 2 fields
 726     return 0 unless $jmax == 1;
 727     my $rtokens = $new_line->get_rtokens();
 728
 729     # the second field must be a comment
 730     return 0 unless $rtokens->[0] eq '#';
 731     my $rfields = $new_line->get_rfields();
 732
 733     # the first field must be empty
 734     return 0 unless $rfields->[0] =~ /^\s*$/;
 735
 736     # the current line must have fewer fields
 737     my $maximum_field_index = $old_line->get_jmax();
 738     return 0
 739       unless $maximum_field_index > $jmax;
 740
 741     # looks ok..
 742     my $rpatterns      = $new_line->get_rpatterns();
 743     my $rfield_lengths = $new_line->get_rfield_lengths();
 744
 745     $new_line->set_is_hanging_side_comment(1);
 746     $jmax = $maximum_field_index;
 747     $new_line->set_jmax($jmax);
 748     $rfields->[$jmax]         = $rfields->[1];
 749     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
 750     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
 751     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
 752     foreach my $j ( 1 .. $jmax - 1 ) {
 753         $rfields->[$j]         = '';
 754         $rfield_lengths->[$j]  = 0;
 755         $rtokens->[ $j - 1 ]   = "";
 756         $rpatterns->[ $j - 1 ] = "";
 757     }
 758     return 1;
 759 }
 760
 761 sub make_side_comment {
 762
 763     # create an empty side comment if none exists
 764
 765     my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_;
 766
 767     my $jmax = @{$rfields} - 1;
 768
 769     # if line does not have a side comment...
 770     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
 771         $jmax += 1;
 772         $rtokens->[ $jmax - 1 ]  = '#';
 773         $rfields->[$jmax]        = '';
 774         $rfield_lengths->[$jmax] = 0;
 775         $rpatterns->[$jmax]      = '#';
 776     }
 777     return;
 778 }
 779
 780 {    ## closure for sub decide_if_list
 781
 782     my %is_comma_token;
 783
 784     BEGIN {
 785
 786         my @q = qw( => );
 787         push @q, ',';
 788         @is_comma_token{@q} = (1) x scalar(@q);
 789     }
 790
 791     sub decide_if_list {
 792
 793         my $line = shift;
 794
 795         # A list will be taken to be a line with a forced break in which all
 796         # of the field separators are commas or comma-arrows (except for the
 797         # trailing #)
 798
 799         my $rtokens    = $line->get_rtokens();
 800         my $test_token = $rtokens->[0];
 801         my ( $raw_tok, $lev, $tag, $tok_count ) =
 802           decode_alignment_token($test_token);
 803         if ( $is_comma_token{$raw_tok} ) {
 804             my $list_type = $test_token;
 805             my $jmax      = $line->get_jmax();
 806
 807             foreach ( 1 .. $jmax - 2 ) {
 808                 ( $raw_tok, $lev, $tag, $tok_count ) =
 809                   decode_alignment_token( $rtokens->[$_] );
 810                 if ( !$is_comma_token{$raw_tok} ) {
 811                     $list_type = "";
 812                     last;
 813                 }
 814             }
 815             $line->set_list_type($list_type);
 816         }
 817         return;
 818     }
 819 }
 820
 821 sub fix_terminal_ternary {
 822
 823     # Add empty fields as necessary to align a ternary term
 824     # like this:
 825     #
 826     #  my $leapyear =
 827     #      $year % 4   ? 0
 828     #    : $year % 100 ? 1
 829     #    : $year % 400 ? 0
 830     #    :               1;
 831     #
 832     # returns the index of the terminal question token, if any
 833
 834     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
 835         $group_level )
 836       = @_;
 837
 838     return unless ($old_line);
 839     use constant EXPLAIN_TERNARY => 0;
 840
 841     my $jmax        = @{$rfields} - 1;
 842     my $rfields_old = $old_line->get_rfields();
 843
 844     my $rpatterns_old       = $old_line->get_rpatterns();
 845     my $rtokens_old         = $old_line->get_rtokens();
 846     my $maximum_field_index = $old_line->get_jmax();
 847
 848     # look for the question mark after the :
 849     my ($jquestion);
 850     my $depth_question;
 851     my $pad        = "";
 852     my $pad_length = 0;
 853     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
 854         my $tok = $rtokens_old->[$j];
 855         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
 856         if ( $raw_tok eq '?' ) {
 857             $depth_question = $lev;
 858
 859             # depth must be correct
 860             next unless ( $depth_question eq $group_level );
 861
 862             $jquestion = $j;
 863             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
 864                 $pad_length = length($1);
 865                 $pad        = " " x $pad_length;
 866             }
 867             else {
 868                 return;    # shouldn't happen
 869             }
 870             last;
 871         }
 872     }
 873     return unless ( defined($jquestion) );    # shouldn't happen
 874
 875     # Now splice the tokens and patterns of the previous line
 876     # into the else line to insure a match.  Add empty fields
 877     # as necessary.
 878     my $jadd = $jquestion;
 879
 880     # Work on copies of the actual arrays in case we have
 881     # to return due to an error
 882     my @fields        = @{$rfields};
 883     my @patterns      = @{$rpatterns};
 884     my @tokens        = @{$rtokens};
 885     my @field_lengths = @{$rfield_lengths};
 886
 887     EXPLAIN_TERNARY && do {
 888         local $" = '><';
 889         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
 890         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
 891         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
 892         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
 893         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
 894         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
 895     };
 896
 897     # handle cases of leading colon on this line
 898     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
 899
 900         my ( $colon, $therest ) = ( $1, $2 );
 901
 902         # Handle sub-case of first field with leading colon plus additional code
 903         # This is the usual situation as at the '1' below:
 904         #  ...
 905         #  : $year % 400 ? 0
 906         #  :               1;
 907         if ($therest) {
 908
 909             # Split the first field after the leading colon and insert padding.
 910             # Note that this padding will remain even if the terminal value goes
 911             # out on a separate line.  This does not seem to look to bad, so no
 912             # mechanism has been included to undo it.
 913             my $field1        = shift @fields;
 914             my $field_length1 = shift @field_lengths;
 915             my $len_colon     = length($colon);
 916             unshift @fields, ( $colon, $pad . $therest );
 917             unshift @field_lengths,
 918               ( $len_colon, $pad_length + $field_length1 - $len_colon );
 919
 920             # change the leading pattern from : to ?
 921             return unless ( $patterns[0] =~ s/^\:/?/ );
 922
 923             # install leading tokens and patterns of existing line
 924             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
 925             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
 926
 927             # insert appropriate number of empty fields
 928             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
 929             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
 930         }
 931
 932         # handle sub-case of first field just equal to leading colon.
 933         # This can happen for example in the example below where
 934         # the leading '(' would create a new alignment token
 935         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
 936         # :                        ( $mname = $name . '->' );
 937         else {
 938
 939             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
 940
 941             # prepend a leading ? onto the second pattern
 942             $patterns[1] = "?b" . $patterns[1];
 943
 944             # pad the second field
 945             $fields[1]        = $pad . $fields[1];
 946             $field_lengths[1] = $pad_length + $field_lengths[1];
 947
 948             # install leading tokens and patterns of existing line, replacing
 949             # leading token and inserting appropriate number of empty fields
 950             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
 951             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
 952             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
 953             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
 954         }
 955     }
 956
 957     # Handle case of no leading colon on this line.  This will
 958     # be the case when -wba=':' is used.  For example,
 959     #  $year % 400 ? 0 :
 960     #                1;
 961     else {
 962
 963         # install leading tokens and patterns of existing line
 964         $patterns[0] = '?' . 'b' . $patterns[0];
 965         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
 966         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
 967
 968         # insert appropriate number of empty fields
 969         $jadd             = $jquestion + 1;
 970         $fields[0]        = $pad . $fields[0];
 971         $field_lengths[0] = $pad_length + $field_lengths[0];
 972         splice( @fields,        0, 0, ('') x $jadd ) if $jadd;
 973         splice( @field_lengths, 0, 0, (0) x $jadd )  if $jadd;
 974     }
 975
 976     EXPLAIN_TERNARY && do {
 977         local $" = '><';
 978         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
 979         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
 980         print STDOUT "MODIFIED FIELDS=<@fields>\n";
 981     };
 982
 983     # all ok .. update the arrays
 984     @{$rfields}        = @fields;
 985     @{$rtokens}        = @tokens;
 986     @{$rpatterns}      = @patterns;
 987     @{$rfield_lengths} = @field_lengths;
 988
 989     # force a flush after this line
 990     return $jquestion;
 991 }
 992
 993 sub fix_terminal_else {
 994
 995     # Add empty fields as necessary to align a balanced terminal
 996     # else block to a previous if/elsif/unless block,
 997     # like this:
 998     #
 999     #  if   ( 1 || $x ) { print "ok 13\n"; }
1000     #  else             { print "not ok 13\n"; }
1001     #
1002     # returns a positive value if the else block should be indented
1003     #
1004     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
1005
1006     return unless ($old_line);
1007     my $jmax = @{$rfields} - 1;
1008     return unless ( $jmax > 0 );
1009
1010     # check for balanced else block following if/elsif/unless
1011     my $rfields_old = $old_line->get_rfields();
1012
1013     # TBD: add handling for 'case'
1014     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
1015
1016     # look for the opening brace after the else, and extract the depth
1017     my $tok_brace = $rtokens->[0];
1018     my $depth_brace;
1019     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
1020
1021     # probably:  "else # side_comment"
1022     else { return }
1023
1024     my $rpatterns_old       = $old_line->get_rpatterns();
1025     my $rtokens_old         = $old_line->get_rtokens();
1026     my $maximum_field_index = $old_line->get_jmax();
1027
1028     # be sure the previous if/elsif is followed by an opening paren
1029     my $jparen    = 0;
1030     my $tok_paren = '(' . $depth_brace;
1031     my $tok_test  = $rtokens_old->[$jparen];
1032     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
1033
1034     # Now find the opening block brace
1035     my ($jbrace);
1036     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
1037         my $tok = $rtokens_old->[$j];
1038         if ( $tok eq $tok_brace ) {
1039             $jbrace = $j;
1040             last;
1041         }
1042     }
1043     return unless ( defined($jbrace) );           # shouldn't happen
1044
1045     # Now splice the tokens and patterns of the previous line
1046     # into the else line to insure a match.  Add empty fields
1047     # as necessary.
1048     my $jadd = $jbrace - $jparen;
1049     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
1050     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
1051     splice( @{$rfields},        1, 0, ('') x $jadd );
1052     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
1053
1054     # force a flush after this line if it does not follow a case
1055     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
1056     else                                      { return $jbrace }
1057 }
1058
1059 my %is_closing_block_type;
1060
1061 BEGIN {
1062     @_ = qw< } ] >;
1063     @is_closing_block_type{@_} = (1) x scalar(@_);
1064 }
1065
1066 sub check_match {
1067
1068     # See if the current line matches the current vertical alignment group.
1069
1070     my ( $self, $new_line, $base_line, $prev_line ) = @_;
1071
1072     # Given:
1073     #  $new_line  = the line being considered for group inclusion
1074     #  $base_line = the first line of the current group
1075     #  $prev_line = the line just before $new_line
1076
1077     # returns a flag and a value as follows:
1078     #    return (0, $imax_align)     if the line does not match
1079     #    return (1, $imax_align)     if the line matches but does not fit
1080     #    return (2, $imax_align)     if the line matches and fits
1081
1082     # Returns '$imax_align' which is the index of the maximum matching token.
1083     # It will be used in the subsequent left-to-right sweep to align as many
1084     # tokens as possible for lines which partially match.
1085     my $imax_align = -1;
1086
1087     # variable $GoToMsg explains reason for no match, for debugging
1088     my $GoToMsg = "";
1089     use constant EXPLAIN_CHECK_MATCH => 0;
1090
1091     # This is a flag for testing alignment by sub sweep_left_to_right only.
1092     # This test can help find problems with the alignment logic.
1093     # This flag should normally be zero.
1094     use constant TEST_SWEEP_ONLY => 0;
1095
1096     my $jmax                = $new_line->get_jmax();
1097     my $maximum_field_index = $base_line->get_jmax();
1098
1099     my $jlimit = $jmax - 2;
1100     if ( $jmax > $maximum_field_index ) {
1101         $jlimit = $maximum_field_index - 2;
1102     }
1103
1104     if ( $new_line->get_is_hanging_side_comment() ) {
1105
1106         # HSC's can join the group if they fit
1107     }
1108
1109     # Everything else
1110     else {
1111
1112         # A group with hanging side comments ends with the first non hanging
1113         # side comment.
1114         if ( $base_line->get_is_hanging_side_comment() ) {
1115             $GoToMsg = "end of hanging side comments";
1116             goto NO_MATCH;
1117         }
1118
1119         # The number of tokens that this line shares with the previous line
1120         # has been stored with the previous line.  This value was calculated
1121         # and stored by sub 'match_line_pair'.
1122         $imax_align = $prev_line->get_imax_pair();
1123
1124         if ( $imax_align != $jlimit ) {
1125             $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
1126             goto NO_MATCH;
1127         }
1128
1129     }
1130
1131     # The tokens match, but the lines must have identical number of
1132     # tokens to join the group.
1133     if ( $maximum_field_index != $jmax ) {
1134         $GoToMsg = "token count differs";
1135         goto NO_MATCH;
1136     }
1137
1138     # The tokens match. Now See if there is space for this line in the
1139     # current group.
1140     if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
1141
1142         EXPLAIN_CHECK_MATCH
1143           && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
1144         return ( 2, $jlimit );
1145     }
1146     else {
1147
1148         EXPLAIN_CHECK_MATCH
1149           && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
1150         return ( 1, $jlimit );
1151     }
1152
1153   NO_MATCH:
1154
1155     EXPLAIN_CHECK_MATCH
1156       && print
1157       "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
1158
1159     return ( 0, $imax_align );
1160 }
1161
1162 sub check_fit {
1163
1164     my ( $self, $new_line, $old_line ) = @_;
1165
1166     # The new line has alignments identical to the current group. Now we have
1167     # to fit the new line into the group without causing a field to exceed the
1168     # line length limit.
1169     #   return true if successful
1170     #   return false if not successful
1171
1172     my $jmax                = $new_line->get_jmax();
1173     my $leading_space_count = $new_line->get_leading_space_count();
1174     my $rfield_lengths      = $new_line->get_rfield_lengths();
1175     my $padding_available   = $old_line->get_available_space_on_right();
1176     my $jmax_old            = $old_line->get_jmax();
1177
1178     # Safety check ... only lines with equal array sizes should arrive here
1179     # from sub check_match.  So if this error occurs, look at recent changes in
1180     # sub check_match.  It is only supposed to check the fit of lines with
1181     # identical numbers of alignment tokens.
1182     if ( $jmax_old ne $jmax ) {
1183
1184         $self->warning(<<EOM);
1185 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
1186 unexpected difference in array lengths: $jmax != $jmax_old
1187 EOM
1188         return;
1189     }
1190
1191     # Save current columns in case this line does not fit.
1192     my @alignments = $old_line->get_alignments();
1193     foreach my $alignment (@alignments) {
1194         $alignment->save_column();
1195     }
1196
1197     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
1198
1199     # Loop over all alignments ...
1200     my $maximum_field_index = $old_line->get_jmax();
1201     for my $j ( 0 .. $jmax ) {
1202
1203         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
1204
1205         if ( $j == 0 ) {
1206             $pad += $leading_space_count;
1207         }
1208
1209         # Keep going if this field does not need any space.
1210         next if $pad < 0;
1211
1212         # See if it needs too much space.
1213         if ( $pad > $padding_available ) {
1214
1215             ################################################
1216             # Line does not fit -- revert to starting state
1217             ################################################
1218             foreach my $alignment (@alignments) {
1219                 $alignment->restore_column();
1220             }
1221             return;
1222         }
1223
1224         # make room for this field
1225         $old_line->increase_field_width( $j, $pad );
1226         $padding_available -= $pad;
1227     }
1228
1229     ######################################
1230     # The line fits, the match is accepted
1231     ######################################
1232     return 1;
1233
1234 }
1235
1236 sub install_new_alignments {
1237
1238     my ($new_line) = @_;
1239
1240     my $jmax           = $new_line->get_jmax();
1241     my $rfield_lengths = $new_line->get_rfield_lengths();
1242     my $col            = $new_line->get_leading_space_count();
1243
1244     for my $j ( 0 .. $jmax ) {
1245         $col += $rfield_lengths->[$j];
1246
1247         # create initial alignments for the new group
1248         my $alignment =
1249           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
1250         $new_line->set_alignment( $j, $alignment );
1251     }
1252     return;
1253 }
1254
1255 sub copy_old_alignments {
1256     my ( $new_line, $old_line ) = @_;
1257     my @new_alignments = $old_line->get_alignments();
1258     $new_line->set_alignments(@new_alignments);
1259     return;
1260 }
1261
1262 sub dump_array {
1263
1264     # debug routine to dump array contents
1265     local $" = ')(';
1266     print STDOUT "(@_)\n";
1267     return;
1268 }
1269
1270 sub level_change {
1271
1272     # compute decrease in level when we remove $diff spaces from the
1273     # leading spaces
1274     my ( $self, $leading_space_count, $diff, $level ) = @_;
1275
1276     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
1277     if ($rOpts_indent_columns) {
1278         my $olev =
1279           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
1280         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
1281         $level -= ( $olev - $nlev );
1282         if ( $level < 0 ) { $level = 0 }
1283     }
1284     return $level;
1285 }
1286
1287 ###############################################
1288 # CODE SECTION 4: Code to process comment lines
1289 ###############################################
1290
1291 sub _flush_comment_lines {
1292
1293     # Output a group consisting of COMMENT lines
1294
1295     my ($self) = @_;
1296     my $rgroup_lines = $self->[_rgroup_lines_];
1297     return unless ( @{$rgroup_lines} );
1298     my $group_level         = $self->[_group_level_];
1299     my $leading_space_count = $self->[_comment_leading_space_count_];
1300     my $leading_string =
1301       $self->get_leading_string( $leading_space_count, $group_level );
1302
1303     # look for excessively long lines
1304     my $max_excess = 0;
1305     foreach my $item ( @{$rgroup_lines} ) {
1306         my ( $str, $str_len ) = @{$item};
1307         my $excess =
1308           $str_len +
1309           $leading_space_count -
1310           $self->maximum_line_length_for_level($group_level);
1311         if ( $excess > $max_excess ) {
1312             $max_excess = $excess;
1313         }
1314     }
1315
1316     # zero leading space count if any lines are too long
1317     if ( $max_excess > 0 ) {
1318         $leading_space_count -= $max_excess;
1319         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
1320         my $file_writer_object = $self->[_file_writer_object_];
1321         my $last_outdented_line_at =
1322           $file_writer_object->get_output_line_number();
1323         $self->[_last_outdented_line_at_] = $last_outdented_line_at;
1324         my $outdented_line_count = $self->[_outdented_line_count_];
1325         unless ($outdented_line_count) {
1326             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
1327         }
1328         my $nlines = @{$rgroup_lines};
1329         $outdented_line_count += $nlines;
1330         $self->[_outdented_line_count_] = $outdented_line_count;
1331     }
1332
1333     # write the lines
1334     my $outdent_long_lines = 0;
1335
1336     foreach my $item ( @{$rgroup_lines} ) {
1337         my ( $str, $str_len, $Kend ) = @{$item};
1338         $self->valign_output_step_B(
1339             {
1340                 leading_space_count       => $leading_space_count,
1341                 line                      => $str,
1342                 line_length               => $str_len,
1343                 side_comment_length       => 0,
1344                 outdent_long_lines        => $outdent_long_lines,
1345                 rvertical_tightness_flags => "",
1346                 level                     => $group_level,
1347                 level_end                 => $group_level,
1348                 Kend                      => $Kend,
1349             }
1350         );
1351     }
1352
1353     $self->initialize_for_new_group();
1354     return;
1355 }
1356
1357 ######################################################
1358 # CODE SECTION 5: Code to process groups of code lines
1359 ######################################################
1360
1361 sub _flush_group_lines {
1362
1363     # This is the vertical aligner internal flush, which leaves the cache
1364     # intact
1365     my ( $self, $level_jump ) = @_;
1366
1367     # $level_jump = $next_level-$group_level, if known
1368     #             = undef if not known
1369
1370     my $rgroup_lines = $self->[_rgroup_lines_];
1371     return unless ( @{$rgroup_lines} );
1372     my $group_type  = $self->[_group_type_];
1373     my $group_level = $self->[_group_level_];
1374
1375     # Debug
1376     0 && do {
1377         my ( $a, $b, $c ) = caller();
1378         my $nlines = @{$rgroup_lines};
1379         print STDOUT
1380 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
1381     };
1382
1383     ############################################
1384     # Section 1: Handle a group of COMMENT lines
1385     ############################################
1386     if ( $group_type eq 'COMMENT' ) {
1387         $self->_flush_comment_lines();
1388         return;
1389     }
1390
1391     #########################################################################
1392     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
1393     # aligning happens here in the following steps:
1394     #########################################################################
1395
1396     # STEP 1: Remove most unmatched tokens. They block good alignments.
1397     my ( $max_lev_diff, $saw_side_comment ) =
1398       delete_unmatched_tokens( $rgroup_lines, $group_level );
1399
1400     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
1401     # matching common alignments.  The indexes of these subgroups are in the
1402     # return variable.
1403     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
1404
1405     # STEP 3: Sweep left to right through the lines, looking for leading
1406     # alignment tokens shared by groups.
1407     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
1408       if ( @{$rgroups} > 1 );
1409
1410     # STEP 4: Move side comments to a common column if possible.
1411     if ($saw_side_comment) {
1412         $self->align_side_comments( $rgroup_lines, $rgroups );
1413     }
1414
1415     # STEP 5: For the -lp option, increase the indentation of lists
1416     # to the desired amount, but do not exceed the line length limit.
1417
1418     # We are allowed to shift a group of lines to the right if:
1419     #  (1) its level is greater than the level of the previous group, and
1420     #  (2) its level is greater than the level of the next line to be written.
1421
1422     my $extra_indent_ok;
1423     if ( $group_level > $self->[_last_level_written_] ) {
1424
1425         # Use the level jump to next line to come, if given
1426         if ( defined($level_jump) ) {
1427             $extra_indent_ok = $level_jump < 0;
1428         }
1429
1430         # Otherwise, assume the next line has the level of the end of last line.
1431         # This fixes case c008.
1432         else {
1433             my $level_end = $rgroup_lines->[-1]->get_level_end();
1434             $extra_indent_ok = $group_level > $level_end;
1435         }
1436     }
1437
1438     my $extra_leading_spaces =
1439       $extra_indent_ok
1440       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
1441       : 0;
1442
1443     # STEP 6: Output the lines.
1444     # All lines in this batch have the same basic leading spacing:
1445     my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
1446
1447     foreach my $line ( @{$rgroup_lines} ) {
1448         $self->valign_output_step_A(
1449             {
1450                 line                 => $line,
1451                 min_ci_gap           => 0,
1452                 do_not_align         => 0,
1453                 group_leader_length  => $group_leader_length,
1454                 extra_leading_spaces => $extra_leading_spaces,
1455                 level                => $group_level,
1456             }
1457         );
1458     }
1459
1460     $self->initialize_for_new_group();
1461     return;
1462 }
1463
1464 {    ## closure for sub sweep_top_down
1465
1466     my $rall_lines;         # all of the lines
1467     my $grp_level;          # level of all lines
1468     my $rgroups;            # describes the partition of lines we will make here
1469     my $group_line_count;   # number of lines in current partition
1470
1471     BEGIN { $rgroups = [] }
1472
1473     sub initialize_for_new_rgroup {
1474         $group_line_count = 0;
1475         return;
1476     }
1477
1478     sub add_to_rgroup {
1479
1480         my ($jend) = @_;
1481         my $rline = $rall_lines->[$jend];
1482
1483         my $jbeg = $jend;
1484         if ( $group_line_count == 0 ) {
1485             install_new_alignments($rline);
1486         }
1487         else {
1488             my $rvals = pop @{$rgroups};
1489             $jbeg = $rvals->[0];
1490             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
1491         }
1492         push @{$rgroups}, [ $jbeg, $jend, undef ];
1493         $group_line_count++;
1494         return;
1495     }
1496
1497     sub get_rgroup_jrange {
1498
1499         return unless @{$rgroups};
1500         return unless ( $group_line_count > 0 );
1501         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
1502         return ( $jbeg, $jend );
1503     }
1504
1505     sub end_rgroup {
1506
1507         my ($imax_align) = @_;
1508         return unless @{$rgroups};
1509         return unless ( $group_line_count > 0 );
1510
1511         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
1512         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
1513
1514         # Undo some alignments of poor two-line combinations.
1515         # We had to wait until now to know the line count.
1516         if ( $jend - $jbeg == 1 ) {
1517             my $line_0 = $rall_lines->[$jbeg];
1518             my $line_1 = $rall_lines->[$jend];
1519
1520             my $imax_pair = $line_1->get_imax_pair();
1521             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
1522
1523             ## flag for possible future use:
1524             ## my $is_isolated_pair = $imax_pair < 0
1525             ##  && ( $jbeg == 0
1526             ##    || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
1527
1528             my $imax_prev =
1529               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
1530
1531             my ( $is_marginal, $imax_align_fix ) =
1532               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
1533                 $imax_prev );
1534             if ($is_marginal) {
1535                 combine_fields( $line_0, $line_1, $imax_align_fix );
1536             }
1537         }
1538
1539         initialize_for_new_rgroup();
1540         return;
1541     }
1542
1543     sub block_penultimate_match {
1544
1545         # emergency reset to prevent sweep_left_to_right from trying to match a
1546         # failed terminal else match
1547         return unless @{$rgroups} > 1;
1548         $rgroups->[-2]->[2] = -1;
1549         return;
1550     }
1551
1552     sub sweep_top_down {
1553         my ( $self, $rlines, $group_level ) = @_;
1554
1555         # Partition the set of lines into final alignment subgroups
1556         # and store the alignments with the lines.
1557
1558         # The alignment subgroups we are making here are groups of consecutive
1559         # lines which have (1) identical alignment tokens and (2) do not
1560         # exceed the allowable maximum line length.  A later sweep from
1561         # left-to-right ('sweep_lr') will handle additional alignments.
1562
1563         # transfer args to closure variables
1564         $rall_lines = $rlines;
1565         $grp_level  = $group_level;
1566         $rgroups    = [];
1567         initialize_for_new_rgroup();
1568         return unless @{$rlines};    # shouldn't happen
1569
1570         # Unset the _end_group flag for the last line if it it set because it
1571         # is not needed and can causes problems for -lp formatting
1572         $rall_lines->[-1]->set_end_group(0);
1573
1574         # Loop over all lines ...
1575         my $jline = -1;
1576         foreach my $new_line ( @{$rall_lines} ) {
1577             $jline++;
1578
1579             # Start a new subgroup if necessary
1580             if ( !$group_line_count ) {
1581                 add_to_rgroup($jline);
1582                 if ( $new_line->get_end_group() ) {
1583                     end_rgroup(-1);
1584                 }
1585                 next;
1586             }
1587
1588             my $j_terminal_match = $new_line->get_j_terminal_match();
1589             my ( $jbeg, $jend ) = get_rgroup_jrange();
1590             if ( !defined($jbeg) ) {
1591
1592                 # safety check, shouldn't happen
1593                 $self->warning(<<EOM);
1594 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
1595 undefined index for group line count $group_line_count
1596 EOM
1597                 $jbeg = $jline;
1598             }
1599             my $base_line = $rall_lines->[$jbeg];
1600
1601             # Initialize a global flag saying if the last line of the group
1602             # should match end of group and also terminate the group.  There
1603             # should be no returns between here and where the flag is handled
1604             # at the bottom.
1605             my $col_matching_terminal = 0;
1606             if ( defined($j_terminal_match) ) {
1607
1608                 # remember the column of the terminal ? or { to match with
1609                 $col_matching_terminal =
1610                   $base_line->get_column($j_terminal_match);
1611
1612                 # Ignore an undefined value as a defensive step; shouldn't
1613                 # normally happen.
1614                 $col_matching_terminal = 0
1615                   unless defined($col_matching_terminal);
1616             }
1617
1618             # -------------------------------------------------------------
1619             # Allow hanging side comment to join current group, if any.  The
1620             # only advantage is to keep the other tokens in the same group. For
1621             # example, this would make the '=' align here:
1622             #  $ax         = 1;           # side comment
1623             #                             # hanging side comment
1624             #  $boondoggle = 5;           # side comment
1625             #  $beetle     = 5;           # side comment
1626
1627             # here is another example..
1628
1629             #  _rtoc_name_count   => {},                   # hash to track ..
1630             #  _rpackage_stack    => [],                   # stack to check ..
1631             #                                              # name changes
1632             #  _rlast_level       => \$last_level,         # brace indentation
1633             #
1634             #
1635             # If this were not desired, the next step could be skipped.
1636             # -------------------------------------------------------------
1637             if ( $new_line->get_is_hanging_side_comment() ) {
1638                 join_hanging_comment( $new_line, $base_line );
1639             }
1640
1641             # If this line has no matching tokens, then flush out the lines
1642             # BEFORE this line unless both it and the previous line have side
1643             # comments.  This prevents this line from pushing side coments out
1644             # to the right.
1645             elsif ( $new_line->get_jmax() == 1 ) {
1646
1647                 # There are no matching tokens, so now check side comments.
1648                 # Programming note: accessing arrays with index -1 is
1649                 # risky in Perl, but we have verified there is at least one
1650                 # line in the group and that there is at least one field.
1651                 my $prev_comment =
1652                   $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
1653                 my $side_comment = $new_line->get_rfields()->[-1];
1654                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
1655             }
1656
1657             # See if the new line matches and fits the current group,
1658             # if it still exists. Flush the current group if not.
1659             my $match_code;
1660             if ($group_line_count) {
1661                 ( $match_code, my $imax_align ) =
1662                   $self->check_match( $new_line, $base_line,
1663                     $rall_lines->[ $jline - 1 ] );
1664                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
1665             }
1666
1667             # Store the new line
1668             add_to_rgroup($jline);
1669
1670             if ( defined($j_terminal_match) ) {
1671
1672                 # Decide if we should fix a terminal match. We can either:
1673                 # 1. fix it and prevent the sweep_lr from changing it, or
1674                 # 2. leave it alone and let sweep_lr try to fix it.
1675
1676                 # The current logic is to fix it if:
1677                 # -it has not joined to previous lines,
1678                 # -and either the previous subgroup has just 1 line, or
1679                 # -this line matched but did not fit (so sweep won't work)
1680                 my $fixit;
1681                 if ( $group_line_count == 1 ) {
1682                     $fixit ||= $match_code;
1683                     if ( !$fixit ) {
1684                         if ( @{$rgroups} > 1 ) {
1685                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
1686                             my $nlines = $jendx - $jbegx + 1;
1687                             $fixit ||= $nlines <= 1;
1688                         }
1689                     }
1690                 }
1691
1692                 if ($fixit) {
1693                     $base_line = $new_line;
1694                     my $col_now = $base_line->get_column($j_terminal_match);
1695
1696                     # Ignore an undefined value as a defensive step; shouldn't
1697                     # normally happen.
1698                     $col_now = 0 unless defined($col_now);
1699
1700                     my $pad = $col_matching_terminal - $col_now;
1701                     my $padding_available =
1702                       $base_line->get_available_space_on_right();
1703                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
1704                         $base_line->increase_field_width( $j_terminal_match,
1705                             $pad );
1706                     }
1707
1708                     # do not let sweep_left_to_right change an isolated 'else'
1709                     if ( !$new_line->get_is_terminal_ternary() ) {
1710                         block_penultimate_match();
1711                     }
1712                 }
1713                 end_rgroup(-1);
1714             }
1715
1716             # end the group if we know we cannot match next line.
1717             elsif ( $new_line->get_end_group() ) {
1718                 end_rgroup(-1);
1719             }
1720         } ## end loop over lines
1721
1722         end_rgroup(-1);
1723         return ($rgroups);
1724     }
1725 }
1726
1727 sub two_line_pad {
1728
1729     my ( $line_m, $line, $imax_min ) = @_;
1730
1731     # Given:
1732     #  two isolated (list) lines
1733     #  imax_min = number of common alignment tokens
1734     # Return:
1735     #  $pad_max = maximum suggested pad distnce
1736     #           = 0 if alignment not recommended
1737     # Note that this is only for two lines which do not have alignment tokens
1738     # in common with any other lines.  It is intended for lists, but it might
1739     # also be used for two non-list lines with a common leading '='.
1740
1741     # Allow alignment if the difference in the two unpadded line lengths
1742     # is not more than either line length.  The idea is to avoid
1743     # aligning lines with very different field lengths, like these two:
1744
1745     #   [
1746     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
1747     #       1, 0, 0, 0, undef, 0, 0
1748     #   ];
1749     my $rfield_lengths   = $line->get_rfield_lengths();
1750     my $rfield_lengths_m = $line_m->get_rfield_lengths();
1751
1752     # Safety check - shouldn't happen
1753     return 0
1754       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
1755
1756     my $lensum_m = 0;
1757     my $lensum   = 0;
1758     for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
1759         $lensum_m += $rfield_lengths_m->[$i];
1760         $lensum   += $rfield_lengths->[$i];
1761     }
1762
1763     my ( $lenmin, $lenmax ) =
1764       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
1765
1766     my $patterns_match;
1767     if ( $line_m->get_list_type() && $line->get_list_type() ) {
1768         $patterns_match = 1;
1769         my $rpatterns_m = $line_m->get_rpatterns();
1770         my $rpatterns   = $line->get_rpatterns();
1771         for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
1772             my $pat   = $rpatterns->[$i];
1773             my $pat_m = $rpatterns_m->[$i];
1774             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
1775         }
1776     }
1777
1778     my $pad_max = $lenmax;
1779     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
1780
1781     return $pad_max;
1782 }
1783
1784 sub sweep_left_to_right {
1785
1786     my ( $rlines, $rgroups, $group_level ) = @_;
1787
1788     # So far we have divided the lines into groups having an equal number of
1789     # identical alignments.  Here we are going to look for common leading
1790     # alignments between the different groups and align them when possible.
1791     # For example, the three lines below are in three groups because each line
1792     # has a different number of commas.  In this routine we will sweep from
1793     # left to right, aligning the leading commas as we go, but stopping if we
1794     # hit the line length limit.
1795
1796     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
1797     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
1798     #  my ( $km,  $area, $varea );
1799
1800     # nothing to do if just one group
1801     my $ng_max = @{$rgroups} - 1;
1802     return unless ( $ng_max > 0 );
1803
1804     ############################################################################
1805     # Step 1: Loop over groups to find all common leading alignment tokens
1806     ############################################################################
1807
1808     my $line;
1809     my $rtokens;
1810     my $imax;     # index of maximum non-side-comment alignment token
1811     my $istop;    # an optional stopping index
1812     my $jbeg;     # starting line index
1813     my $jend;     # ending line index
1814
1815     my $line_m;
1816     my $rtokens_m;
1817     my $imax_m;
1818     my $istop_m;
1819     my $jbeg_m;
1820     my $jend_m;
1821
1822     my $istop_mm;
1823
1824     # Look at neighboring pairs of groups and form a simple list
1825     # of all common leading alignment tokens. Foreach such match we
1826     # store [$i, $ng], where
1827     #  $i = index of the token in the line (0,1,...)
1828     #  $ng is the second of the two groups with this common token
1829     my @icommon;
1830
1831     # Hash to hold the maximum alignment change for any group
1832     my %max_move;
1833
1834     # a small number of columns
1835     my $short_pad = 4;
1836
1837     my $ng = -1;
1838     foreach my $item ( @{$rgroups} ) {
1839         $ng++;
1840
1841         $istop_mm = $istop_m;
1842
1843         # save _m values of previous group
1844         $line_m    = $line;
1845         $rtokens_m = $rtokens;
1846         $imax_m    = $imax;
1847         $istop_m   = $istop;
1848         $jbeg_m    = $jbeg;
1849         $jend_m    = $jend;
1850
1851         # Get values for this group. Note that we just have to use values for
1852         # one of the lines of the group since all members have the same
1853         # alignments.
1854         ( $jbeg, $jend, $istop ) = @{$item};
1855
1856         $line    = $rlines->[$jbeg];
1857         $rtokens = $line->get_rtokens();
1858         $imax    = $line->get_jmax() - 2;
1859         $istop   = -1 unless ( defined($istop) );
1860         $istop   = $imax if ( $istop > $imax );
1861
1862         # Initialize on first group
1863         next if ( $ng == 0 );
1864
1865         # Use the minimum index limit of the two groups
1866         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
1867
1868         # Also impose a limit if given.
1869         if ( $istop_m < $imax_min ) {
1870             $imax_min = $istop_m;
1871         }
1872
1873         # Special treatment of two one-line groups isolated from other lines,
1874         # unless they form a simple list or a terminal match.  Otherwise the
1875         # alignment can look strange in some cases.
1876         my $list_type = $rlines->[$jbeg]->get_list_type();
1877         if (
1878                $jend == $jbeg
1879             && $jend_m == $jbeg_m
1880             && ( $ng == 1 || $istop_mm < 0 )
1881             && ( $ng == $ng_max || $istop < 0 )
1882             && !$line->get_j_terminal_match()
1883
1884             # Only do this for imperfect matches. This is normally true except
1885             # when two perfect matches cannot form a group because the line
1886             # length limit would be exceeded. In that case we can still try
1887             # to match as many alignments as possible.
1888             && ( $imax != $imax_m || $istop_m != $imax_m )
1889           )
1890         {
1891
1892             # We will just align assignments and simple lists
1893             next unless ( $imax_min >= 0 );
1894             next
1895               unless ( $rtokens->[0] =~ /^=\d/
1896                 || $list_type );
1897
1898             # In this case we will limit padding to a short distance.  This
1899             # is a compromise to keep some vertical alignment but prevent large
1900             # gaps, which do not look good for just two lines.
1901             my $pad_max =
1902               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
1903             next unless ($pad_max);
1904             my $ng_m = $ng - 1;
1905             $max_move{"$ng_m"} = $pad_max;
1906             $max_move{"$ng"}   = $pad_max;
1907         }
1908
1909         # Loop to find all common leading tokens.
1910         if ( $imax_min >= 0 ) {
1911             foreach my $i ( 0 .. $imax_min ) {
1912                 my $tok   = $rtokens->[$i];
1913                 my $tok_m = $rtokens_m->[$i];
1914                 last if ( $tok ne $tok_m );
1915                 push @icommon, [ $i, $ng, $tok ];
1916             }
1917         }
1918     }
1919     return unless @icommon;
1920
1921     ###########################################################
1922     # Step 2: Reorder and consolidate the list into a task list
1923     ###########################################################
1924
1925     # We have to work first from lowest token index to highest, then by group,
1926     # sort our list first on token index then group number
1927     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
1928
1929     # Make a task list of the form
1930     #   [$i, ng_beg, $ng_end, $tok], ..
1931     # where
1932     #   $i is the index of the token to be aligned
1933     #   $ng_beg..$ng_end is the group range for this action
1934     my @todo;
1935     my ( $i, $ng_end, $tok );
1936     foreach my $item (@icommon) {
1937         my $ng_last = $ng_end;
1938         my $i_last  = $i;
1939         ( $i, $ng_end, $tok ) = @{$item};
1940         my $ng_beg = $ng_end - 1;
1941         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
1942             my $var = pop(@todo);
1943             $ng_beg = $var->[1];
1944         }
1945         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
1946         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
1947     }
1948
1949     ###############################
1950     # Step 3: Execute the task list
1951     ###############################
1952     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
1953         $group_level );
1954     return;
1955 }
1956
1957 {    ## closure for sub do_left_to_right_sweep
1958
1959     my %is_good_alignment_token;
1960
1961     BEGIN {
1962
1963         # One of the most difficult aspects of vertical alignment is knowing
1964         # when not to align.  Alignment can go from looking very nice to very
1965         # bad when overdone.  In the sweep algorithm there are two special
1966         # cases where we may need to limit padding to a '$short_pad' distance
1967         # to avoid some very ugly formatting:
1968
1969         # 1. Two isolated lines with partial alignment
1970         # 2. A 'tail-wag-dog' situation, in which a single terminal
1971         #    line with partial alignment could cause a significant pad
1972         #    increase in many previous lines if allowed to join the alignment.
1973
1974         # For most alignment tokens, we will allow only a small pad to be
1975         # introduced (the hardwired $short_pad variable) . But for some 'good'
1976         # alignments we can be less restrictive.
1977
1978         # These are 'good' alignments, which are allowed more padding:
1979         my @q = qw(
1980           => = ? if unless or || {
1981         );
1982         push @q, ',';
1983         @is_good_alignment_token{@q} = (0) x scalar(@q);
1984
1985         # Promote a few of these to 'best', with essentially no pad limit:
1986         $is_good_alignment_token{'='}      = 1;
1987         $is_good_alignment_token{'if'}     = 1;
1988         $is_good_alignment_token{'unless'} = 1;
1989         $is_good_alignment_token{'=>'}     = 1
1990
1991           # Note the hash values are set so that:
1992           #         if ($is_good_alignment_token{$raw_tok}) => best
1993           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
1994
1995     }
1996
1997     sub do_left_to_right_sweep {
1998         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
1999           = @_;
2000
2001         # $blocking_level[$nj is the level at a match failure between groups
2002         # $ng-1 and $ng
2003         my @blocking_level;
2004         my $group_list_type = $rlines->[0]->get_list_type();
2005
2006         my $move_to_common_column = sub {
2007
2008             # Move the alignment column of token $itok to $col_want for a
2009             # sequence of groups.
2010             my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
2011             return unless ( defined($ngb) && $nge > $ngb );
2012             foreach my $ng ( $ngb .. $nge ) {
2013
2014                 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
2015                 my $line  = $rlines->[$jbeg];
2016                 my $col   = $line->get_column($itok);
2017                 my $avail = $line->get_available_space_on_right();
2018                 my $move  = $col_want - $col;
2019                 if ( $move > 0 ) {
2020
2021                     # limit padding increase in isolated two lines
2022                     next
2023                       if ( defined( $rmax_move->{$ng} )
2024                         && $move > $rmax_move->{$ng}
2025                         && !$is_good_alignment_token{$raw_tok} );
2026
2027                     $line->increase_field_width( $itok, $move );
2028                 }
2029                 elsif ( $move < 0 ) {
2030
2031                     # spot to take special action on failure to move
2032                 }
2033             }
2034         };
2035
2036         foreach my $task ( @{$rtodo} ) {
2037             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
2038
2039             # Nothing to do for a single group
2040             next unless ( $ng_end > $ng_beg );
2041
2042             my $ng_first;  # index of the first group of a continuous sequence
2043             my $col_want;  # the common alignment column of a sequence of groups
2044             my $col_limit; # maximum column before bumping into max line length
2045             my $line_count_ng_m = 0;
2046             my $jmax_m;
2047             my $it_stop_m;
2048
2049             # Loop over the groups
2050             # 'ix_' = index in the array of lines
2051             # 'ng_' = index in the array of groups
2052             # 'it_' = index in the array of tokens
2053             my $ix_min      = $rgroups->[$ng_beg]->[0];
2054             my $ix_max      = $rgroups->[$ng_end]->[1];
2055             my $lines_total = $ix_max - $ix_min + 1;
2056             foreach my $ng ( $ng_beg .. $ng_end ) {
2057                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
2058                 my $line_count_ng = $ix_end - $ix_beg + 1;
2059
2060                 # Important: note that since all lines in a group have a common
2061                 # alignments object, we just have to work on one of the lines
2062                 # (the first line).  All of the rest will be changed
2063                 # automatically.
2064                 my $line = $rlines->[$ix_beg];
2065                 my $jmax = $line->get_jmax();
2066
2067                 # the maximum space without exceeding the line length:
2068                 my $avail   = $line->get_available_space_on_right();
2069                 my $col     = $line->get_column($itok);
2070                 my $col_max = $col + $avail;
2071
2072                 # Initialize on first group
2073                 if ( !defined($col_want) ) {
2074                     $ng_first        = $ng;
2075                     $col_want        = $col;
2076                     $col_limit       = $col_max;
2077                     $line_count_ng_m = $line_count_ng;
2078                     $jmax_m          = $jmax;
2079                     $it_stop_m       = $it_stop;
2080                     next;
2081                 }
2082
2083                 # RULE: Throw a blocking flag upon encountering a token level
2084                 # different from the level of the first blocking token.  For
2085                 # example, in the following example, if the = matches get
2086                 # blocked between two groups as shown, then we want to start
2087                 # blocking matches at the commas, which are at deeper level, so
2088                 # that we do not get the big gaps shown here:
2089
2090                 #  my $unknown3 = pack( "v",          -2 );
2091                 #  my $unknown4 = pack( "v",          0x09 );
2092                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
2093                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
2094                 #  my $root_startblock = pack( "V",   $root_start );
2095                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
2096
2097                 # On the other hand, it is okay to keep matching at the same
2098                 # level such as in a simple list of commas and/or fat arrors.
2099
2100                 my $is_blocked = defined( $blocking_level[$ng] )
2101                   && $lev > $blocking_level[$ng];
2102
2103                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
2104                 # Do not let one or two lines with a **different number of
2105                 # alignments** open up a big gap in a large block.  For
2106                 # example, we will prevent something like this, where the first
2107                 # line prys open the rest:
2108
2109             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
2110             #  $worksheet->write( "C7", "",                    $format );
2111             #  $worksheet->write( "D7", "",                    $format );
2112             #  $worksheet->write( "D8", "",                    $format );
2113             #  $worksheet->write( "D8", "",                    $format );
2114
2115                 # We should exclude from consideration two groups which are
2116                 # effectively the same but separated because one does not
2117                 # fit in the maximum allowed line length.
2118                 my $is_same_group =
2119                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
2120
2121                 my $lines_above = $ix_beg - $ix_min;
2122                 my $lines_below = $lines_total - $lines_above;
2123
2124                 # Increase the tolerable gap for certain favorable factors
2125                 my $factor    = 1;
2126                 my $top_level = $lev == $group_level;
2127
2128                 # Align best top level alignment tokens like '=', 'if', ...
2129                 # A factor of 10 allows a gap of up to 40 spaces
2130                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
2131                     $factor = 10;
2132                 }
2133
2134                 # Otherwise allow some minimal padding of good alignments
2135                 elsif (
2136
2137                     defined( $is_good_alignment_token{$raw_tok} )
2138
2139                     # We have to be careful if there are just 2 lines.  This
2140                     # two-line factor allows large gaps only for 2 lines which
2141                     # are simple lists with fewer items on the second line. It
2142                     # gives results similar to previous versions of perltidy.
2143                     && (   $lines_total > 2
2144                         || $group_list_type && $jmax < $jmax_m && $top_level )
2145                   )
2146                 {
2147                     $factor += 1;
2148                     if ($top_level) {
2149                         $factor += 1;
2150                     }
2151                 }
2152
2153                 my $is_big_gap;
2154                 if ( !$is_same_group ) {
2155                     $is_big_gap ||=
2156                       (      $lines_above == 1
2157                           || $lines_above == 2 && $lines_below >= 4 )
2158                       && $col_want > $col + $short_pad * $factor;
2159                     $is_big_gap ||=
2160                       (      $lines_below == 1
2161                           || $lines_below == 2 && $lines_above >= 4 )
2162                       && $col > $col_want + $short_pad * $factor;
2163                 }
2164
2165                 # if match is limited by gap size, stop aligning at this level
2166                 if ($is_big_gap) {
2167                     $blocking_level[$ng] = $lev - 1;
2168                 }
2169
2170                 # quit and restart if it cannot join this batch
2171                 if (   $col_want > $col_max
2172                     || $col > $col_limit
2173                     || $is_big_gap
2174                     || $is_blocked )
2175                 {
2176
2177                     # remember the level of the first blocking token
2178                     if ( !defined( $blocking_level[$ng] ) ) {
2179                         $blocking_level[$ng] = $lev;
2180                     }
2181
2182                     $move_to_common_column->(
2183                         $ng_first, $ng - 1, $itok, $col_want, $raw_tok
2184                     );
2185                     $ng_first        = $ng;
2186                     $col_want        = $col;
2187                     $col_limit       = $col_max;
2188                     $line_count_ng_m = $line_count_ng;
2189                     $jmax_m          = $jmax;
2190                     $it_stop_m       = $it_stop;
2191                     next;
2192                 }
2193
2194                 $line_count_ng_m += $line_count_ng;
2195
2196                 # update the common column and limit
2197                 if ( $col > $col_want )      { $col_want  = $col }
2198                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
2199
2200             } ## end loop over groups
2201
2202             if ( $ng_end > $ng_first ) {
2203                 $move_to_common_column->(
2204                     $ng_first, $ng_end, $itok, $col_want, $raw_tok
2205                 );
2206             } ## end loop over groups for one task
2207         } ## end loop over tasks
2208
2209         return;
2210     }
2211 }
2212
2213 sub delete_selected_tokens {
2214
2215     my ( $line_obj, $ridel ) = @_;
2216
2217     # $line_obj    is the line to be modified
2218     # $ridel       is a ref to list of indexes to be deleted
2219
2220     # remove an unused alignment token(s) to improve alignment chances
2221
2222     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
2223
2224     my $jmax_old           = $line_obj->get_jmax();
2225     my $rfields_old        = $line_obj->get_rfields();
2226     my $rfield_lengths_old = $line_obj->get_rfield_lengths();
2227     my $rpatterns_old      = $line_obj->get_rpatterns();
2228     my $rtokens_old        = $line_obj->get_rtokens();
2229     my $j_terminal_match   = $line_obj->get_j_terminal_match();
2230
2231     use constant EXPLAIN_DELETE_SELECTED => 0;
2232
2233     local $" = '> <';
2234     EXPLAIN_DELETE_SELECTED && print <<EOM;
2235 delete indexes: <@{$ridel}>
2236 old jmax: $jmax_old
2237 old tokens: <@{$rtokens_old}>
2238 old patterns: <@{$rpatterns_old}>
2239 old fields: <@{$rfields_old}>
2240 old field_lengths: <@{$rfield_lengths_old}>
2241 EOM
2242
2243     my $rfields_new        = [];
2244     my $rpatterns_new      = [];
2245     my $rtokens_new        = [];
2246     my $rfield_lengths_new = [];
2247
2248     # Convert deletion list to a hash to allow any order, multiple entries,
2249     # and avoid problems with index values out of range
2250     my %delete_me;
2251     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
2252
2253     my $pattern      = $rpatterns_old->[0];
2254     my $field        = $rfields_old->[0];
2255     my $field_length = $rfield_lengths_old->[0];
2256     push @{$rfields_new},        $field;
2257     push @{$rfield_lengths_new}, $field_length;
2258     push @{$rpatterns_new},      $pattern;
2259
2260     # Loop to either copy items or concatenate fields and patterns
2261     my $jmin_del;
2262     for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
2263         my $token        = $rtokens_old->[$j];
2264         my $field        = $rfields_old->[ $j + 1 ];
2265         my $field_length = $rfield_lengths_old->[ $j + 1 ];
2266         my $pattern      = $rpatterns_old->[ $j + 1 ];
2267         if ( !$delete_me{$j} ) {
2268             push @{$rtokens_new},        $token;
2269             push @{$rfields_new},        $field;
2270             push @{$rpatterns_new},      $pattern;
2271             push @{$rfield_lengths_new}, $field_length;
2272         }
2273         else {
2274             if ( !defined($jmin_del) ) { $jmin_del = $j }
2275             $rfields_new->[-1] .= $field;
2276             $rfield_lengths_new->[-1] += $field_length;
2277             $rpatterns_new->[-1] .= $pattern;
2278         }
2279     }
2280
2281     # ----- x ------ x ------ x ------
2282     #t      0        1        2        <- token indexing
2283     #f   0      1        2        3    <- field and pattern
2284
2285     my $jmax_new = @{$rfields_new} - 1;
2286     $line_obj->set_rtokens($rtokens_new);
2287     $line_obj->set_rpatterns($rpatterns_new);
2288     $line_obj->set_rfields($rfields_new);
2289     $line_obj->set_rfield_lengths($rfield_lengths_new);
2290     $line_obj->set_jmax($jmax_new);
2291
2292     # The value of j_terminal_match will be incorrect if we delete tokens prior
2293     # to it. We will have to give up on aligning the terminal tokens if this
2294     # happens.
2295     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
2296         $line_obj->set_j_terminal_match(undef);
2297     }
2298
2299     # update list type -
2300     if ( $line_obj->get_list_seqno() ) {
2301
2302         ## This works, but for efficiency see if we need to make a change:
2303         ## decide_if_list($line_obj);
2304
2305         # An existing list will still be a list but with possibly different
2306         # leading token
2307         my $old_list_type = $line_obj->get_list_type();
2308         my $new_list_type = "";
2309         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
2310             $new_list_type = $rtokens_new->[0];
2311         }
2312         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
2313             decide_if_list($line_obj);
2314         }
2315     }
2316
2317     EXPLAIN_DELETE_SELECTED && print <<EOM;
2318
2319 new jmax: $jmax_new
2320 new tokens: <@{$rtokens_new}>
2321 new patterns: <@{$rpatterns_new}>
2322 new fields: <@{$rfields_new}>
2323 EOM
2324     return;
2325 }
2326
2327 {    ## closure for sub decode_alignment_token
2328
2329     # This routine is called repeatedly for each token, so it needs to be
2330     # efficient.  We can speed things up by remembering the inputs and outputs
2331     # in a hash.
2332     my %decoded_token;
2333
2334     sub initialize_decode {
2335
2336         # We will re-initialize the hash for each file. Otherwise, there is
2337         # a danger that the hash can become arbitrarily large if a very large
2338         # number of files is processed at once.
2339         %decoded_token = ();
2340         return;
2341     }
2342
2343     sub decode_alignment_token {
2344
2345         # Unpack the values packed in an alignment token
2346         #
2347         # Usage:
2348         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
2349         #          decode_alignment_token($token);
2350
2351         # Alignment tokens have a trailing decimal level and optional tag (for
2352         # commas):
2353         # For example, the first comma in the following line
2354         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
2355         # is decorated as follows:
2356         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
2357
2358         # An optional token count may be appended with a leading dot.
2359         # Currently this is only done for '=' tokens but this could change.
2360         # For example, consider the following line:
2361         #   $nport   = $port = shift || $name;
2362         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
2363         # The second '=' will be '=0.2' [level 0, second equals]
2364         my ($tok) = @_;
2365
2366         if ( defined( $decoded_token{$tok} ) ) {
2367             return @{ $decoded_token{$tok} };
2368         }
2369
2370         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
2371         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
2372             $raw_tok   = $1;
2373             $lev       = $2;
2374             $tag       = $3 if ($3);
2375             $tok_count = $5 if ($5);
2376         }
2377         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
2378         $decoded_token{$tok} = \@vals;
2379         return @vals;
2380     }
2381 }
2382
2383 {    ## closure for sub delete_unmatched_tokens
2384
2385     my %is_assignment;
2386     my %keep_after_deleted_assignment;
2387
2388     BEGIN {
2389         my @q;
2390
2391         @q = qw(
2392           = **= += *= &= <<= &&=
2393           -= /= |= >>= ||= //=
2394           .= %= ^=
2395           x=
2396         );
2397         @is_assignment{@q} = (1) x scalar(@q);
2398
2399         # These tokens may be kept following an = deletion
2400         @q = qw(
2401           if unless or ||
2402         );
2403         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
2404
2405     }
2406
2407     # This flag is for testing only and should normally be zero.
2408     use constant TEST_DELETE_NULL => 0;
2409
2410     sub delete_unmatched_tokens {
2411         my ( $rlines, $group_level ) = @_;
2412
2413         # This is a preliminary step in vertical alignment in which we remove
2414         # as many obviously un-needed alignment tokens as possible.  This will
2415         # prevent them from interfering with the final alignment.
2416
2417         # These are the return values
2418         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
2419         my $saw_side_comment = 0;    # used to avoid a call for side comments
2420
2421         # Handle no lines -- shouldn't happen
2422         return unless @{$rlines};
2423
2424         # Handle a single line
2425         if ( @{$rlines} == 1 ) {
2426             my $line   = $rlines->[0];
2427             my $jmax   = $line->get_jmax();
2428             my $length = $line->get_rfield_lengths()->[$jmax];
2429             $saw_side_comment = $length > 0;
2430             return ( $max_lev_diff, $saw_side_comment );
2431         }
2432
2433         my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
2434
2435         # ignore hanging side comments in these operations
2436         my @filtered   = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
2437         my $rnew_lines = \@filtered;
2438
2439         $saw_side_comment = @filtered != @{$rlines};
2440         $max_lev_diff     = 0;
2441
2442         # nothing to do if all lines were hanging side comments
2443         my $jmax = @{$rnew_lines} - 1;
2444         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
2445
2446         my @equals_info;
2447         my @line_info;
2448         my %is_good_tok;
2449
2450         # create a hash of tokens for each line
2451         my $rline_hashes = [];
2452         foreach my $line ( @{$rnew_lines} ) {
2453             my $rhash     = {};
2454             my $rtokens   = $line->get_rtokens();
2455             my $rpatterns = $line->get_rpatterns();
2456             my $i         = 0;
2457             my ( $i_eq, $tok_eq, $pat_eq );
2458             my ( $lev_min, $lev_max );
2459             foreach my $tok ( @{$rtokens} ) {
2460                 my ( $raw_tok, $lev, $tag, $tok_count ) =
2461                   decode_alignment_token($tok);
2462
2463                 if ( $tok ne '#' ) {
2464                     if ( !defined($lev_min) ) {
2465                         $lev_min = $lev;
2466                         $lev_max = $lev;
2467                     }
2468                     else {
2469                         if ( $lev < $lev_min ) { $lev_min = $lev }
2470                         if ( $lev > $lev_max ) { $lev_max = $lev }
2471                     }
2472                 }
2473                 else {
2474                     if ( !$saw_side_comment ) {
2475                         my $length = $line->get_rfield_lengths()->[ $i + 1 ];
2476                         $saw_side_comment ||= $length;
2477                     }
2478                 }
2479
2480                 # Possible future upgrade: for multiple matches,
2481                 # record [$i1, $i2, ..] instead of $i
2482                 $rhash->{$tok} =
2483                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
2484
2485                 # remember the first equals at line level
2486                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
2487
2488                     if ( $lev eq $group_level ) {
2489                         $i_eq   = $i;
2490                         $tok_eq = $tok;
2491                         $pat_eq = $rpatterns->[$i];
2492                     }
2493                 }
2494                 $i++;
2495             }
2496             push @{$rline_hashes}, $rhash;
2497             push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
2498             push @line_info, [ $lev_min, $lev_max ];
2499             if ( defined($lev_min) ) {
2500                 my $lev_diff = $lev_max - $lev_min;
2501                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
2502             }
2503         }
2504
2505         # compare each line pair and record matches
2506         my $rtok_hash = {};
2507         my $nr        = 0;
2508         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
2509             my $nl = $nr;
2510             $nr = 0;
2511             my $jr      = $jl + 1;
2512             my $rhash_l = $rline_hashes->[$jl];
2513             my $rhash_r = $rline_hashes->[$jr];
2514             my $count   = 0;                      # UNUSED NOW?
2515             my $ntoks   = 0;
2516             foreach my $tok ( keys %{$rhash_l} ) {
2517                 $ntoks++;
2518                 if ( defined( $rhash_r->{$tok} ) ) {
2519                     if ( $tok ne '#' ) { $count++; }
2520                     my $il = $rhash_l->{$tok}->[0];
2521                     my $ir = $rhash_r->{$tok}->[0];
2522                     $rhash_l->{$tok}->[2] = $ir;
2523                     $rhash_r->{$tok}->[1] = $il;
2524                     if ( $tok ne '#' ) {
2525                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
2526                         $nr++;
2527                     }
2528                 }
2529             }
2530
2531             # Set a line break if no matching tokens between these lines
2532             # (this is not strictly necessary now but does not hurt)
2533             if ( $nr == 0 && $nl > 0 ) {
2534                 $rnew_lines->[$jl]->set_end_group(1);
2535             }
2536
2537             # Also set a line break if both lines have simple equals but with
2538             # different leading characters in patterns.  This check is similar
2539             # to one in sub check_match, and will prevent sub
2540             # prune_alignment_tree from removing alignments which otherwise
2541             # should be kept. This fix is rarely needed, but it can
2542             # occasionally improve formatting.
2543             # For example:
2544             #     my $name = $this->{Name};
2545             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
2546             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
2547             #     my $cast    = ( $type     ? "($type *)" : "" );
2548             # The last two lines start with 'my' and will not match the
2549             # previous line starting with $type, so we do not want
2550             # prune_alignment tree to delete their ? : alignments at a deeper
2551             # level.
2552             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
2553             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
2554             if ( defined($i_eq_l) && defined($i_eq_r) ) {
2555
2556                 # Also, do not align equals across a change in ci level
2557                 my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
2558                   $rnew_lines->[$jr]->get_ci_level();
2559
2560                 if (
2561                        $tok_eq_l eq $tok_eq_r
2562                     && $i_eq_l == 0
2563                     && $i_eq_r == 0
2564                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
2565                         || $ci_jump )
2566                   )
2567                 {
2568                     $rnew_lines->[$jl]->set_end_group(1);
2569                 }
2570             }
2571         }
2572
2573         # find subgroups
2574         my @subgroups;
2575         push @subgroups, [ 0, $jmax ];
2576         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
2577             if ( $rnew_lines->[$jl]->get_end_group() ) {
2578                 $subgroups[-1]->[1] = $jl;
2579                 push @subgroups, [ $jl + 1, $jmax ];
2580             }
2581         }
2582
2583         # flag to allow skipping pass 2
2584         my $saw_large_group;
2585
2586         ############################################################
2587         # PASS 1 over subgroups to remove unmatched alignment tokens
2588         ############################################################
2589         foreach my $item (@subgroups) {
2590             my ( $jbeg, $jend ) = @{$item};
2591
2592             my $nlines = $jend - $jbeg + 1;
2593
2594             ####################################################
2595             # Look for complete if/elsif/else and ternary blocks
2596             ####################################################
2597
2598             # We are looking for a common '$dividing_token' like these:
2599
2600             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
2601             #    elsif ($b)          { $p->{'type'} = 'b'; }
2602             #    elsif ($s)          { $p->{'type'} = 's'; }
2603             #    else                { $p->{'type'} = ''; }
2604             #                        ^----------- dividing_token
2605
2606             #   my $severity =
2607             #      !$routine                     ? '[PFX]'
2608             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
2609             #     : $routine =~ /ck_warn/        ? 'W'
2610             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
2611             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
2612             #     : $routine =~ /vWARN\d/        ? '[WDS]'
2613             #     :                                '[PFX]';
2614             #                                    ^----------- dividing_token
2615
2616             # Only look for groups which are more than 2 lines long.  Two lines
2617             # can get messed up doing this, probably due to the various
2618             # two-line rules.
2619
2620             my $dividing_token;
2621             my %token_line_count;
2622             if ( $nlines > 2 ) {
2623
2624                 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2625                     my %seen;
2626                     my $line    = $rnew_lines->[$jj];
2627                     my $rtokens = $line->get_rtokens();
2628                     foreach my $tok ( @{$rtokens} ) {
2629                         if ( !$seen{$tok} ) {
2630                             $seen{$tok}++;
2631                             $token_line_count{$tok}++;
2632                         }
2633                     }
2634                 }
2635
2636                 foreach my $tok ( keys %token_line_count ) {
2637                     if ( $token_line_count{$tok} == $nlines ) {
2638                         if (   substr( $tok, 0, 1 ) eq '?'
2639                             || substr( $tok, 0, 1 ) eq '{'
2640                             && $tok =~ /^\{\d+if/ )
2641                         {
2642                             $dividing_token = $tok;
2643                             last;
2644                         }
2645                     }
2646                 }
2647             }
2648
2649             #####################################################
2650             # Loop over lines to remove unwanted alignment tokens
2651             #####################################################
2652             for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2653                 my $line    = $rnew_lines->[$jj];
2654                 my $rtokens = $line->get_rtokens();
2655                 my $rhash   = $rline_hashes->[$jj];
2656                 my $i_eq    = $equals_info[$jj]->[0];
2657                 my @idel;
2658                 my $imax = @{$rtokens} - 2;
2659                 my $delete_above_level;
2660                 my $deleted_assignment_token;
2661
2662                 my $saw_dividing_token = "";
2663                 $saw_large_group ||= $nlines > 2 && $imax > 1;
2664
2665                 # Loop over all alignment tokens
2666                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2667                     my $tok = $rtokens->[$i];
2668                     next if ( $tok eq '#' );    # shouldn't happen
2669                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
2670                       @{ $rhash->{$tok} };
2671
2672                     #######################################################
2673                     # Here is the basic RULE: remove an unmatched alignment
2674                     # which does not occur in the surrounding lines.
2675                     #######################################################
2676                     my $delete_me = !defined($il) && !defined($ir);
2677
2678                     # But now we modify this with exceptions...
2679
2680                     # EXCEPTION 1: If we are in a complete ternary or
2681                     # if/elsif/else group, and this token is not on every line
2682                     # of the group, should we delete it to preserve overall
2683                     # alignment?
2684                     if ($dividing_token) {
2685                         if ( $token_line_count{$tok} >= $nlines ) {
2686                             $saw_dividing_token ||= $tok eq $dividing_token;
2687                         }
2688                         else {
2689
2690                             # For shorter runs, delete toks to save alignment.
2691                             # For longer runs, keep toks after the '{' or '?'
2692                             # to allow sub-alignments within braces.  The
2693                             # number 5 lines is arbitrary but seems to work ok.
2694                             $delete_me ||=
2695                               ( $nlines < 5 || !$saw_dividing_token );
2696                         }
2697                     }
2698
2699                     # EXCEPTION 2: Remove all tokens above a certain level
2700                     # following a previous deletion.  For example, we have to
2701                     # remove tagged higher level alignment tokens following a
2702                     # '=>' deletion because the tags of higher level tokens
2703                     # will now be incorrect. For example, this will prevent
2704                     # aligning commas as follows after deleting the second '=>'
2705                     #    $w->insert(
2706                     #   ListBox => origin => [ 270, 160 ],
2707                     #   size    => [ 200,           55 ],
2708                     #    );
2709                     if ( defined($delete_above_level) ) {
2710                         if ( $lev > $delete_above_level ) {
2711                             $delete_me ||= 1;    #$tag;
2712                         }
2713                         else { $delete_above_level = undef }
2714                     }
2715
2716                     # EXCEPTION 3: Remove all but certain tokens after an
2717                     # assignment deletion.
2718                     if (
2719                         $deleted_assignment_token
2720                         && ( $lev > $group_level
2721                             || !$keep_after_deleted_assignment{$raw_tok} )
2722                       )
2723                     {
2724                         $delete_me ||= 1;
2725                     }
2726
2727                     # EXCEPTION 4: Do not touch the first line of a 2 line
2728                     # terminal match, such as below, because j_terminal has
2729                     # already been set.
2730                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
2731                     #    else      { $tago = $tagc = ''; }
2732                     # But see snippets 'else1.t' and 'else2.t'
2733                     $delete_me = 0
2734                       if ( $jj == $jbeg
2735                         && $has_terminal_match
2736                         && $nlines == 2 );
2737
2738                     # EXCEPTION 5: misc additional rules for commas and equals
2739                     if ($delete_me) {
2740
2741                         # okay to delete second and higher copies of a token
2742                         if ( $tok_count == 1 ) {
2743
2744                             # for a comma...
2745                             if ( $raw_tok eq ',' ) {
2746
2747                                 # Do not delete commas before an equals
2748                                 $delete_me = 0
2749                                   if ( defined($i_eq) && $i < $i_eq );
2750
2751                                 # Do not delete line-level commas
2752                                 $delete_me = 0 if ( $lev <= $group_level );
2753                             }
2754
2755                             # For an assignment at group level..
2756                             if (   $is_assignment{$raw_tok}
2757                                 && $lev == $group_level )
2758                             {
2759
2760                                 # Do not delete if it is the last alignment of
2761                                 # multiple tokens; this will prevent some
2762                                 # undesirable alignments
2763                                 if ( $imax > 0 && $i == $imax ) {
2764                                     $delete_me = 0;
2765                                 }
2766
2767                                 # Otherwise, set a flag to delete most
2768                                 # remaining tokens
2769                                 else { $deleted_assignment_token = $raw_tok }
2770                             }
2771                         }
2772                     }
2773
2774                     #####################################
2775                     # Add this token to the deletion list
2776                     #####################################
2777                     if ($delete_me) {
2778                         push @idel, $i;
2779
2780                         # update deletion propagation flags
2781                         if ( !defined($delete_above_level)
2782                             || $lev < $delete_above_level )
2783                         {
2784
2785                             # delete all following higher level alignments
2786                             $delete_above_level = $lev;
2787
2788                             # but keep deleting after => to next lower level
2789                             # to avoid some bizarre alignments
2790                             if ( $raw_tok eq '=>' ) {
2791                                 $delete_above_level = $lev - 1;
2792                             }
2793                         }
2794                     }
2795                 }    # End loop over alignment tokens
2796
2797                 # Process all deletion requests for this line
2798                 if (@idel) {
2799                     delete_selected_tokens( $line, \@idel );
2800                 }
2801             }    # End loopover lines
2802         }    # End loop over subgroups
2803
2804         #################################################
2805         # PASS 2 over subgroups to remove null alignments
2806         #################################################
2807
2808         # This pass is only used for testing. It is helping to identify
2809         # alignment situations which might be improved with a future more
2810         # general algorithm which adds a tail matching capability.
2811         if (TEST_DELETE_NULL) {
2812             delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
2813               if ($saw_large_group);
2814         }
2815
2816         # PASS 3: Construct a tree of matched lines and delete some small deeper
2817         # levels of tokens.  They also block good alignments.
2818         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
2819
2820         # PASS 4: compare all lines for common tokens
2821         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
2822
2823         return ( $max_lev_diff, $saw_side_comment );
2824     }
2825 }
2826
2827 sub delete_null_alignments {
2828     my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
2829
2830     # This is an optional second pass for deleting alignment tokens which can
2831     # occasionally improve alignment.  We look for and remove 'null
2832     # alignments', which are alignments that require no padding.  So we can
2833     # 'cheat' and delete them. For example, notice the '=~' alignment in the
2834     # first two lines of the following code:
2835
2836     #    $sysname .= 'del' if $self->label =~ /deletion/;
2837     #    $sysname .= 'ins' if $self->label =~ /insertion/;
2838     #    $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
2839
2840     # These '=~' tokens are already aligned because they are both the same
2841     # distance from the previous alignment token, the 'if'.  So we can
2842     # eliminate them as alignments.  The advantage is that in some cases, such
2843     # as this one, this will allow other tokens to be aligned. In this case we
2844     # then get the 'if' tokens to align:
2845
2846     #   $sysname .= 'del'                     if $self->label =~ /deletion/;
2847     #   $sysname .= 'ins'                     if $self->label =~ /insertion/;
2848     #   $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
2849
2850     # The following rules for limiting this operation have been found to
2851     # work well and avoid problems:
2852
2853     # Rule 1. We only consider a sequence of lines which have the same
2854     # sequence of alignment tokens.
2855
2856     # Rule 2. We never eliminate the first alignment token.  One reason is that
2857     # lines may have different leading indentation spaces, so keeping the
2858     # first alignment token insures that our length measurements start at
2859     # a well-defined point.  Another reason is that nothing is gained because
2860     # the left-to-right sweep can always handle alignment of this token.
2861
2862     # Rule 3. We require that the first alignment token exist in either
2863     # a previous line or a subsequent line.  The reason is that this avoids
2864     # changing two-line matches which go through special logic.
2865
2866     # Rule 4. Do not delete a token which occurs in a previous or subsequent
2867     # line. For example, in the above example, it was ok to eliminate the '=~'
2868     # token from two lines because it did not occur in a surrounding line.
2869     # If it did occur in a surrounding line, the result could be confusing
2870     # or even incorrectly aligned.
2871
2872     # A consequence of these rules is that we only need to consider subgroups
2873     # with at least 3 lines and 2 alignment tokens.
2874
2875     # The subgroup line index range
2876     my ( $jbeg, $jend );
2877
2878     # Vars to keep track of the start of a current sequence of matching
2879     # lines.
2880     my $rtokens_match;
2881     my $rfield_lengths_match;
2882     my $j_match_beg;
2883     my $j_match_end;
2884     my $imax_match;
2885     my $rneed_pad;
2886
2887     # Vars for a line being tested
2888     my $rtokens;
2889     my $rfield_lengths;
2890     my $imax;
2891
2892     my $start_match = sub {
2893         my ($jj) = @_;
2894         $rtokens_match        = $rtokens;
2895         $rfield_lengths_match = $rfield_lengths;
2896         $j_match_beg          = $jj;
2897         $j_match_end          = $jj;
2898         $imax_match           = $imax;
2899         $rneed_pad            = [];
2900         return;
2901     };
2902
2903     my $add_to_match = sub {
2904         my ($jj) = @_;
2905         $j_match_end = $jj;
2906
2907         # Keep track of any padding that would be needed for each token
2908         for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2909             next if ( $rneed_pad->[$i] );
2910             my $length       = $rfield_lengths->[$i];
2911             my $length_match = $rfield_lengths_match->[$i];
2912             if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
2913         }
2914     };
2915
2916     my $end_match = sub {
2917         return unless ( $j_match_end > $j_match_beg );
2918         my $nlines    = $j_match_end - $j_match_beg + 1;
2919         my $rhash_beg = $rline_hashes->[$j_match_beg];
2920         my $rhash_end = $rline_hashes->[$j_match_end];
2921         my @idel;
2922
2923         # Do not delete unless the first token also occurs in a surrounding line
2924         my $tok0 = $rtokens_match->[0];
2925         return
2926           unless (
2927             (
2928                    $j_match_beg > $jbeg
2929                 && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
2930                 $tok0
2931             )
2932             || (   $j_match_end < $jend
2933                 && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
2934                 $tok0 )
2935           );
2936
2937         # Note that we are skipping the token at i=0
2938         for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
2939
2940             # do not delete a token which requires padding to align
2941             next if ( $rneed_pad->[$i] );
2942
2943             my $tok = $rtokens_match->[$i];
2944
2945             # Do not delete a token which occurs in a surrounding line
2946             next
2947               if ( $j_match_beg > $jbeg
2948                 && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
2949             next
2950               if ( $j_match_end < $jend
2951                 && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
2952
2953             # ok to delete
2954             push @idel, $i;
2955             ##print "ok to delete tok=$tok\n";
2956         }
2957         if (@idel) {
2958             foreach my $j ( $j_match_beg .. $j_match_end ) {
2959                 delete_selected_tokens( $rnew_lines->[$j], \@idel );
2960             }
2961         }
2962     };
2963
2964     foreach my $item ( @{$rsubgroups} ) {
2965         ( $jbeg, $jend ) = @{$item};
2966         my $nlines = $jend - $jbeg + 1;
2967         next unless ( $nlines > 2 );
2968
2969         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
2970             my $line = $rnew_lines->[$jj];
2971             $rtokens        = $line->get_rtokens();
2972             $rfield_lengths = $line->get_rfield_lengths();
2973             $imax           = @{$rtokens} - 2;
2974
2975             # start a new match group
2976             if ( $jj == $jbeg ) {
2977                 $start_match->($jj);
2978                 next;
2979             }
2980
2981             # see if all tokens of this line match the current group
2982             my $match;
2983             if ( $imax == $imax_match ) {
2984                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
2985                     my $tok       = $rtokens->[$i];
2986                     my $tok_match = $rtokens_match->[$i];
2987                     last if ( $tok ne $tok_match );
2988                 }
2989                 $match = 1;
2990             }
2991
2992             # yes, they all match
2993             if ($match) {
2994                 $add_to_match->($jj);
2995             }
2996
2997             # now, this line does not match
2998             else {
2999                 $end_match->();
3000                 $start_match->($jj);
3001             }
3002         }    # End loopover lines
3003         $end_match->();
3004     }    # End loop over subgroups
3005     return;
3006 } ## end sub delete_null_alignments
3007
3008 sub match_line_pairs {
3009     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
3010
3011     # Compare each pair of lines and save information about common matches
3012     # $rlines     = list of lines including hanging side comments
3013     # $rnew_lines = list of lines without any hanging side comments
3014     # $rsubgroups = list of subgroups of the new lines
3015
3016     # TODO:
3017     # Maybe change: imax_pair => pair_match_info = ref to array
3018     #  = [$imax_align, $rMsg, ... ]
3019     #  This may eventually have multi-level match info
3020
3021     # Previous line vars
3022     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
3023         $list_type_m, $ci_level_m );
3024
3025     # Current line vars
3026     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
3027         $ci_level );
3028
3029     use constant EXPLAIN_COMPARE_PATTERNS => 0;
3030
3031     my $compare_patterns = sub {
3032
3033         # helper routine to decide if patterns match well enough..
3034         # return code:
3035         #   0 = patterns match, continue
3036         #   1 = no match
3037         #   2 = no match, and lines do not match at all
3038
3039         my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
3040         my $GoToMsg     = "";
3041         my $return_code = 1;
3042
3043         my ( $alignment_token, $lev, $tag, $tok_count ) =
3044           decode_alignment_token($tok);
3045
3046         # We have to be very careful about aligning commas
3047         # when the pattern's don't match, because it can be
3048         # worse to create an alignment where none is needed
3049         # than to omit one.  Here's an example where the ','s
3050         # are not in named containers.  The first line below
3051         # should not match the next two:
3052         #   ( $a, $b ) = ( $b, $r );
3053         #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
3054         #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
3055         if ( $alignment_token eq ',' ) {
3056
3057             # do not align commas unless they are in named
3058             # containers
3059             $GoToMsg = "do not align commas in unnamed containers";
3060             goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
3061         }
3062
3063         # do not align parens unless patterns match;
3064         # large ugly spaces can occur in math expressions.
3065         elsif ( $alignment_token eq '(' ) {
3066
3067             # But we can allow a match if the parens don't
3068             # require any padding.
3069             $GoToMsg = "do not align '(' unless patterns match or pad=0";
3070             if ( $pad != 0 ) { goto NO_MATCH }
3071         }
3072
3073         # Handle an '=' alignment with different patterns to
3074         # the left.
3075         elsif ( $alignment_token eq '=' ) {
3076
3077             # It is best to be a little restrictive when
3078             # aligning '=' tokens.  Here is an example of
3079             # two lines that we will not align:
3080             #       my $variable=6;
3081             #       $bb=4;
3082             # The problem is that one is a 'my' declaration,
3083             # and the other isn't, so they're not very similar.
3084             # We will filter these out by comparing the first
3085             # letter of the pattern.  This is crude, but works
3086             # well enough.
3087             if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
3088                 $GoToMsg = "first character before equals differ";
3089                 goto NO_MATCH;
3090             }
3091
3092             # The introduction of sub 'prune_alignment_tree'
3093             # enabled alignment of lists left of the equals with
3094             # other scalar variables. For example:
3095             # my ( $D, $s, $e ) = @_;
3096             # my $d             = length $D;
3097             # my $c             = $e - $s - $d;
3098
3099             # But this would change formatting of a lot of scripts,
3100             # so for now we prevent alignment of comma lists on the
3101             # left with scalars on the left.  We will also prevent
3102             # any partial alignments.
3103
3104           # set return code 2 if the = is at line level, but
3105           # set return code 1 if the = is below line level, i.e.
3106           #    sub new { my ( $p, $v ) = @_; bless \$v, $p }
3107           #    sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
3108
3109             elsif (
3110                 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
3111             {
3112                 $GoToMsg = "mixed commas/no-commas before equals";
3113                 if ( $lev eq $group_level ) {
3114                     $return_code = 2;
3115                 }
3116                 goto NO_MATCH;
3117             }
3118         }
3119
3120       MATCH:
3121         return ( 0, \$GoToMsg );
3122
3123       NO_MATCH:
3124
3125         EXPLAIN_COMPARE_PATTERNS
3126           && print STDERR "no match because $GoToMsg\n";
3127
3128         return ( $return_code, \$GoToMsg );
3129
3130     };    ## end of $compare_patterns->()
3131
3132     # loop over subgroups
3133     foreach my $item ( @{$rsubgroups} ) {
3134         my ( $jbeg, $jend ) = @{$item};
3135         my $nlines = $jend - $jbeg + 1;
3136         next unless ( $nlines > 1 );
3137
3138         # loop over lines in a subgroup
3139         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
3140
3141             $line_m           = $line;
3142             $rtokens_m        = $rtokens;
3143             $rpatterns_m      = $rpatterns;
3144             $rfield_lengths_m = $rfield_lengths;
3145             $imax_m           = $imax;
3146             $list_type_m      = $list_type;
3147             $ci_level_m       = $ci_level;
3148
3149             $line           = $rnew_lines->[$jj];
3150             $rtokens        = $line->get_rtokens();
3151             $rpatterns      = $line->get_rpatterns();
3152             $rfield_lengths = $line->get_rfield_lengths();
3153             $imax           = @{$rtokens} - 2;
3154             $list_type      = $line->get_list_type();
3155             $ci_level       = $line->get_ci_level();
3156
3157             # nothing to do for first line
3158             next if ( $jj == $jbeg );
3159
3160             my $ci_jump = $ci_level - $ci_level_m;
3161
3162             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
3163
3164             my $imax_align = -1;
3165
3166             # find number of leading common tokens
3167
3168             #################################
3169             # No match to hanging side comment
3170             #################################
3171             if ( $line->get_is_hanging_side_comment() ) {
3172
3173                 # Should not get here; HSC's have been filtered out
3174                 $imax_align = -1;
3175             }
3176
3177             ##############################
3178             # Handle comma-separated lists
3179             ##############################
3180             elsif ( $list_type && $list_type eq $list_type_m ) {
3181
3182                 # do not align lists across a ci jump with new list method
3183                 if ($ci_jump) { $imax_min = -1 }
3184
3185                 my $i_nomatch = $imax_min + 1;
3186                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
3187                     my $tok   = $rtokens->[$i];
3188                     my $tok_m = $rtokens_m->[$i];
3189                     if ( $tok ne $tok_m ) {
3190                         $i_nomatch = $i;
3191                         last;
3192                     }
3193                 }
3194
3195                 $imax_align = $i_nomatch - 1;
3196             }
3197
3198             ##################
3199             # Handle non-lists
3200             ##################
3201             else {
3202                 my $i_nomatch = $imax_min + 1;
3203                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
3204                     my $tok   = $rtokens->[$i];
3205                     my $tok_m = $rtokens_m->[$i];
3206                     if ( $tok ne $tok_m ) {
3207                         $i_nomatch = $i;
3208                         last;
3209                     }
3210
3211                     my $pat   = $rpatterns->[$i];
3212                     my $pat_m = $rpatterns_m->[$i];
3213
3214                     # If patterns don't match, we have to be careful...
3215                     if ( $pat_m ne $pat ) {
3216                         my $pad =
3217                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
3218                         my ( $match_code, $rmsg ) = $compare_patterns->(
3219                             $tok, $tok_m, $pat, $pat_m, $pad
3220                         );
3221                         if ($match_code) {
3222                             if    ( $match_code eq 1 ) { $i_nomatch = $i }
3223                             elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
3224                             last;
3225                         }
3226                     }
3227                 }
3228                 $imax_align = $i_nomatch - 1;
3229             }
3230
3231             $line_m->set_imax_pair($imax_align);
3232
3233         } ## end loop over lines
3234
3235         # Put fence at end of subgroup
3236         $line->set_imax_pair(-1);
3237
3238     } ## end loop over subgroups
3239
3240     # if there are hanging side comments, propagate the pair info down to them
3241     # so that lines can just look back one line for their pair info.
3242     if ( @{$rlines} > @{$rnew_lines} ) {
3243         my $last_pair_info = -1;
3244         foreach my $line ( @{$rlines} ) {
3245             if ( $line->get_is_hanging_side_comment() ) {
3246                 $line->set_imax_pair($last_pair_info);
3247             }
3248             else {
3249                 $last_pair_info = $line->get_imax_pair();
3250             }
3251         }
3252     }
3253     return;
3254 }
3255
3256 sub fat_comma_to_comma {
3257     my ($str) = @_;
3258
3259     # We are changing '=>' to ',' and removing any trailing decimal count
3260     # because currently fat commas have a count and commas do not.
3261     # For example, we will change '=>2+{-3.2' into ',2+{-3'
3262     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
3263     return $str;
3264 }
3265
3266 sub get_line_token_info {
3267
3268     # scan lines of tokens and return summary information about the range of
3269     # levels and patterns.
3270     my ($rlines) = @_;
3271
3272     # First scan to check monotonicity. Here is an example of several
3273     # lines which are monotonic. The = is the lowest level, and
3274     # the commas are all one level deeper. So this is not nonmonotonic.
3275     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
3276     #  $$d{"days"}    = [ "d",  "day", "days" ];
3277     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
3278     my @all_token_info;
3279     my $all_monotonic = 1;
3280     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
3281         my ($line) = $rlines->[$jj];
3282         my $rtokens = $line->get_rtokens();
3283         my $last_lev;
3284         my $is_monotonic = 1;
3285         my $i            = -1;
3286         foreach my $tok ( @{$rtokens} ) {
3287             $i++;
3288             my ( $raw_tok, $lev, $tag, $tok_count ) =
3289               decode_alignment_token($tok);
3290             push @{ $all_token_info[$jj] },
3291               [ $raw_tok, $lev, $tag, $tok_count ];
3292             last if ( $tok eq '#' );
3293             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
3294             $last_lev = $lev;
3295         }
3296         if ( !$is_monotonic ) { $all_monotonic = 0 }
3297     }
3298
3299     my $rline_values = [];
3300     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
3301         my ($line) = $rlines->[$jj];
3302
3303         my $rtokens = $line->get_rtokens();
3304         my $i       = -1;
3305         my ( $lev_min, $lev_max );
3306         my $token_pattern_max = "";
3307         my %saw_level;
3308         my @token_info;
3309         my $is_monotonic = 1;
3310
3311         # find the index of the last token before the side comment
3312         my $imax      = @{$rtokens} - 2;
3313         my $imax_true = $imax;
3314
3315         # If the entire group is monotonic, and the line ends in a comma list,
3316         # walk it back to the first such comma. this will have the effect of
3317         # making all trailing ragged comma lists match in the prune tree
3318         # routine.  these trailing comma lists can better be handled by later
3319         # alignment rules.
3320
3321         # Treat fat commas the same as commas here by converting them to
3322         # commas.  This will improve the chance of aligning the leading parts
3323         # of ragged lists.
3324
3325         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
3326         if ( $all_monotonic && $tok_end =~ /^,/ ) {
3327             my $i = $imax - 1;
3328             while ( $i >= 0
3329                 && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
3330             {
3331                 $imax = $i;
3332                 $i--;
3333             }
3334         }
3335
3336         # make a first pass to find level range
3337         my $last_lev;
3338         foreach my $tok ( @{$rtokens} ) {
3339             $i++;
3340             last if ( $i > $imax );
3341             last if ( $tok eq '#' );
3342             my ( $raw_tok, $lev, $tag, $tok_count ) =
3343               @{ $all_token_info[$jj]->[$i] };
3344
3345             last if ( $tok eq '#' );
3346             $token_pattern_max .= $tok;
3347             $saw_level{$lev}++;
3348             if ( !defined($lev_min) ) {
3349                 $lev_min = $lev;
3350                 $lev_max = $lev;
3351             }
3352             else {
3353                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
3354                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
3355                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
3356             }
3357             $last_lev = $lev;
3358         }
3359
3360         # handle no levels
3361         my $rtoken_patterns = {};
3362         my $rtoken_indexes  = {};
3363         my @levs            = sort keys %saw_level;
3364         if ( !defined($lev_min) ) {
3365             $lev_min                     = -1;
3366             $lev_max                     = -1;
3367             $levs[0]                     = -1;
3368             $rtoken_patterns->{$lev_min} = "";
3369             $rtoken_indexes->{$lev_min}  = [];
3370         }
3371
3372         # handle one level
3373         elsif ( $lev_max == $lev_min ) {
3374             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3375             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3376         }
3377
3378         # handle multiple levels
3379         else {
3380             $rtoken_patterns->{$lev_max} = $token_pattern_max;
3381             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
3382
3383             my $debug   = 0;
3384             my $lev_top = pop @levs;    # alread did max level
3385             my $itok    = -1;
3386             foreach my $tok ( @{$rtokens} ) {
3387                 $itok++;
3388                 last if ( $itok > $imax );
3389                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3390                   @{ $all_token_info[$jj]->[$itok] };
3391                 last if ( $raw_tok eq '#' );
3392                 foreach my $lev_test (@levs) {
3393                     next if ( $lev > $lev_test );
3394                     $rtoken_patterns->{$lev_test} .= $tok;
3395                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
3396                 }
3397             }
3398             push @levs, $lev_top;
3399         }
3400
3401         push @{$rline_values},
3402           [
3403             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3404             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3405           ];
3406
3407         # debug
3408         0 && do {
3409             local $" = ')(';
3410             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
3411             foreach my $key ( sort keys %{$rtoken_patterns} ) {
3412                 print "$key => $rtoken_patterns->{$key}\n";
3413                 print "$key => @{$rtoken_indexes->{$key}}\n";
3414             }
3415         };
3416     } ## end loop over lines
3417     return ( $rline_values, $all_monotonic );
3418 }
3419
3420 sub prune_alignment_tree {
3421     my ($rlines) = @_;
3422     my $jmax = @{$rlines} - 1;
3423     return unless $jmax > 0;
3424
3425     # Vertical alignment in perltidy is done as an iterative process.  The
3426     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
3427     # etc) for vertical alignment.  Then we have to delete all alignments
3428     # which, if actually made, would detract from overall alignment.  This
3429     # is done in several phases of which this is one.
3430
3431     # In this routine we look at the alignments of a group of lines as a
3432     # hierarchical tree.  We will 'prune' the tree to limited depths if that
3433     # will improve overall alignment at the lower depths.
3434     # For each line we will be looking at its alignment patterns down to
3435     # different fixed depths. For each depth, we include all lower depths and
3436     # ignore all higher depths.  We want to see if we can get alignment of a
3437     # larger group of lines if we ignore alignments at some lower depth.
3438     # Here is an # example:
3439
3440     # for (
3441     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
3442     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
3443     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
3444     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
3445     # );
3446
3447     # In the above example, all lines have three commas at the lowest depth
3448     # (zero), so if there were no other alignements, these lines would all
3449     # align considering only the zero depth alignment token.  But some lines
3450     # have additional comma alignments at the next depth, so we need to decide
3451     # if we should drop those to keep the top level alignments, or keep those
3452     # for some additional low level alignments at the expense losing some top
3453     # level alignments.  In this case we will drop the deeper level commas to
3454     # keep the entire collection aligned.  But in some cases the decision could
3455     # go the other way.
3456
3457     # The tree for this example at the zero depth has one node containing
3458     # all four lines, since they are identical at zero level (three commas).
3459     # At depth one, there are three 'children' nodes, namely:
3460     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
3461     # - line 3, which has 2 commas at depth 1
3462     # - line4, which has a ';' and a ',' at depth 1
3463     # There are no deeper alignments in this example.
3464     # so the tree structure for this example is:
3465     #
3466     #    depth 0         depth 1      depth 2
3467     #    [lines 1-4] --  [line 1-2] -  (empty)
3468     #                 |  [line 3]   -  (empty)
3469     #                 |  [line 4]   -  (empty)
3470
3471     # We can carry this to any depth, but it is not really useful to go below
3472     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
3473     # alignments at depth >=2.
3474
3475     use constant EXPLAIN_PRUNE => 0;
3476
3477     ####################################################################
3478     # Prune Tree Step 1. Start by scanning the lines and collecting info
3479     ####################################################################
3480
3481     # Note that the caller had this info but we have to redo this now because
3482     # alignment tokens may have been deleted.
3483     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
3484
3485     # If all the lines have levels which increase monotonically from left to
3486     # right, then the sweep-left-to-right pass can do a better job of alignment
3487     # than pruning, and without deleting alignments.
3488     return if ($all_monotonic);
3489
3490     # Contents of $rline_values
3491     #   [
3492     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
3493     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
3494     #   ];
3495
3496     # We can work to any depth, but there is little advantage to working
3497     # to a a depth greater than 2
3498     my $MAX_DEPTH = 2;
3499
3500     # This arrays will hold the tree of alignment tokens at different depths
3501     # for these lines.
3502     my @match_tree;
3503
3504     # Tree nodes contain these values:
3505     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
3506     #                        $nc_beg_p, $nc_end_p, $rindexes];
3507     # where
3508     #      $depth = 0,1,2 = index of depth of the match
3509
3510     #  $jbeg beginning index j of the range of lines in this match
3511     #  $jend ending index j of the range of lines in this match
3512     #  $n_parent = index of the containing group at $depth-1, if it exists
3513     #  $level = actual level of code being matched in this group
3514     #  $pattern = alignment pattern being matched
3515     #  $nc_beg_p = first child
3516     #  $nc_end_p = last child
3517     #  $rindexes = ref to token indexes
3518
3519     # the patterns and levels of the current group being formed at each depth
3520     my ( @token_patterns_current, @levels_current, @token_indexes_current );
3521
3522     # the patterns and levels of the next line being tested at each depth
3523     my ( @token_patterns_next, @levels_next, @token_indexes_next );
3524
3525     #########################################################
3526     # define a recursive worker subroutine for tree construction
3527     #########################################################
3528
3529     # This is a recursive routine which is called if a match condition changes
3530     # at any depth when a new line is encountered.  It ends the match node
3531     # which changed plus all deeper nodes attached to it.
3532     my $end_node;
3533     $end_node = sub {
3534         my ( $depth, $jl, $n_parent ) = @_;
3535
3536         # $depth is the tree depth
3537         # $jl is the  index of the line
3538         # $n_parent is index of the parent node of this node
3539
3540         return if ( $depth > $MAX_DEPTH );
3541
3542         # end any current group at this depth
3543         if (   $jl >= 0
3544             && defined( $match_tree[$depth] )
3545             && @{ $match_tree[$depth] }
3546             && defined( $levels_current[$depth] ) )
3547         {
3548             $match_tree[$depth]->[-1]->[1] = $jl;
3549         }
3550
3551         # Define the index of the node we will create below
3552         my $ng_self = 0;
3553         if ( defined( $match_tree[$depth] ) ) {
3554             $ng_self = @{ $match_tree[$depth] };
3555         }
3556
3557         # end any next deeper child node(s)
3558         $end_node->( $depth + 1, $jl, $ng_self );
3559
3560         # update the levels being matched
3561         $token_patterns_current[$depth] = $token_patterns_next[$depth];
3562         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
3563         $levels_current[$depth]         = $levels_next[$depth];
3564
3565         # Do not start a new group at this level if it is not being used
3566         if ( !defined( $levels_next[$depth] )
3567             || $depth > 0
3568             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
3569         {
3570             return;
3571         }
3572
3573         # Create a node for the next group at this depth. We initially assume
3574         # that it will continue to $jmax, and correct that later if the node
3575         # ends earlier.
3576         push @{ $match_tree[$depth] },
3577           [
3578             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
3579             $token_patterns_current[$depth],
3580             undef, undef, $token_indexes_current[$depth],
3581           ];
3582
3583         return;
3584     };    ## end sub end_node
3585
3586     ######################################################
3587     # Prune Tree Step 2. Loop to form the tree of matches.
3588     ######################################################
3589     for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
3590
3591         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
3592         my $jm = $jp - 1;
3593
3594         # Pull out needed values for the next line
3595         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
3596             $is_monotonic, $imax_true, $imax )
3597           = @{ $rline_values->[$jp] };
3598
3599         # Transfer levels and patterns for this line to the working arrays.
3600         # If the number of levels differs from our chosen MAX_DEPTH ...
3601         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
3602         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
3603         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
3604         if ( @{$rlevs} > $MAX_DEPTH ) {
3605             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
3606         }
3607         my $depth = 0;
3608         foreach (@levels_next) {
3609             $token_patterns_next[$depth] =
3610               defined($_) ? $rtoken_patterns->{$_} : undef;
3611             $token_indexes_next[$depth] =
3612               defined($_) ? $rtoken_indexes->{$_} : undef;
3613             $depth++;
3614         }
3615
3616         # Look for a change in match groups...
3617
3618         # Initialize on the first line
3619         if ( $jp == 0 ) {
3620             my $n_parent;
3621             $end_node->( 0, $jm, $n_parent );
3622         }
3623
3624         # End groups if a hard flag has been set
3625         elsif ( $rlines->[$jm]->get_end_group() ) {
3626             my $n_parent;
3627             $end_node->( 0, $jm, $n_parent );
3628         }
3629
3630         # Continue at hanging side comment
3631         elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
3632             next;
3633         }
3634
3635         # Otherwise see if anything changed and update the tree if so
3636         else {
3637             foreach my $depth ( 0 .. $MAX_DEPTH ) {
3638
3639                 my $def_current = defined( $token_patterns_current[$depth] );
3640                 my $def_next    = defined( $token_patterns_next[$depth] );
3641                 last unless ( $def_current || $def_next );
3642                 if (   !$def_current
3643                     || !$def_next
3644                     || $token_patterns_current[$depth] ne
3645                     $token_patterns_next[$depth] )
3646                 {
3647                     my $n_parent;
3648                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
3649                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
3650                     }
3651                     $end_node->( $depth, $jm, $n_parent );
3652                     last;
3653                 }
3654             }
3655         }
3656     } ## end loop to form tree of matches
3657
3658     ##########################################################
3659     # Prune Tree Step 3. Make links from parent to child nodes
3660     ##########################################################
3661
3662     # It seemed cleaner to do this as a separate step rather than during tree
3663     # construction.  The children nodes have links up to the parent node which
3664     # created them.  Now make links in the opposite direction, so the parents
3665     # can find the children.  We store the range of children nodes ($nc_beg,
3666     # $nc_end) of each parent with two additional indexes in the orignal array.
3667     # These will be undef if no children.
3668     for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
3669         next unless defined( $match_tree[$depth] );
3670         my $nc_max = @{ $match_tree[$depth] } - 1;
3671         my $np_now;
3672         foreach my $nc ( 0 .. $nc_max ) {
3673             my $np = $match_tree[$depth]->[$nc]->[2];
3674             if ( !defined($np) ) {
3675
3676                 # shouldn't happen
3677                 #print STDERR "lost child $np at depth $depth\n";
3678                 next;
3679             }
3680             if ( !defined($np_now) || $np != $np_now ) {
3681                 $np_now = $np;
3682                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
3683             }
3684             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
3685         }
3686     } ## end loop to make links down to the child nodes
3687
3688     EXPLAIN_PRUNE > 0 && do {
3689         print "Tree complete. Found these groups:\n";
3690         foreach my $depth ( 0 .. $MAX_DEPTH ) {
3691             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
3692         }
3693     };
3694
3695     #######################################################
3696     # Prune Tree Step 4. Make a list of nodes to be deleted
3697     #######################################################
3698
3699     #  list of lines with tokens to be deleted:
3700     #  [$jbeg, $jend, $level_keep]
3701     #  $jbeg..$jend is the range of line indexes,
3702     #  $level_keep is the minimum level to keep
3703     my @delete_list;
3704
3705     #  Groups with ending comma lists and their range of sizes:
3706     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
3707     my %ragged_comma_group;
3708
3709     # Define a threshold line count for forcing a break
3710     my $nlines_break = 3;
3711
3712     # We work with a list of nodes to visit at the next deeper depth.
3713     my @todo_list;
3714     if ( defined( $match_tree[0] ) ) {
3715         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
3716     }
3717
3718     for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
3719         last unless (@todo_list);
3720         my @todo_next;
3721         foreach my $np (@todo_list) {
3722             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
3723                 $rindexes_p )
3724               = @{ $match_tree[$depth]->[$np] };
3725             my $nlines_p = $jend_p - $jbeg_p + 1;
3726
3727             # nothing to do if no children
3728             next unless defined($nc_beg_p);
3729
3730             # Define the number of lines to either keep or delete a child node.
3731             # This is the key decision we have to make.  We want to delete
3732             # short runs of matched lines, and keep long runs.  It seems easier
3733             # for the eye to follow breaks in monotonic level changes than
3734             # non-monotonic level changes.  For example, the following looks
3735             # best if we delete the lower level alignments:
3736
3737             #  [1]                  ~~ [];
3738             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
3739             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
3740             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
3741             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
3742             #  $deep1               ~~ $deep1;
3743
3744             # So we will use two thresholds.
3745             my $nmin_mono     = $depth + 2;
3746             my $nmin_non_mono = $depth + 6;
3747             if ( $nmin_mono > $nlines_p - 1 ) {
3748                 $nmin_mono = $nlines_p - 1;
3749             }
3750             if ( $nmin_non_mono > $nlines_p - 1 ) {
3751                 $nmin_non_mono = $nlines_p - 1;
3752             }
3753
3754             # loop to keep or delete each child node
3755             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
3756                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
3757                     $nc_end_c )
3758                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
3759                 my $nlines_c     = $jend_c - $jbeg_c + 1;
3760                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
3761                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
3762                 if ( $nlines_c < $nmin ) {
3763 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
3764                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
3765                 }
3766                 else {
3767 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
3768                     push @todo_next, $nc;
3769                 }
3770             }
3771         }
3772         @todo_list = @todo_next;
3773     } ## end loop to mark nodes to delete
3774
3775     #############################################################
3776     # Prune Tree Step 5. Loop to delete selected alignment tokens
3777     #############################################################
3778     foreach my $item (@delete_list) {
3779         my ( $jbeg, $jend, $level_keep ) = @{$item};
3780         foreach my $jj ( $jbeg .. $jend ) {
3781             my $line = $rlines->[$jj];
3782             my @idel;
3783             my $rtokens = $line->get_rtokens();
3784             my $imax    = @{$rtokens} - 2;
3785             for ( my $i = 0 ; $i <= $imax ; $i++ ) {
3786                 my $tok = $rtokens->[$i];
3787                 my ( $raw_tok, $lev, $tag, $tok_count ) =
3788                   decode_alignment_token($tok);
3789                 if ( $lev > $level_keep ) {
3790                     push @idel, $i;
3791                 }
3792             }
3793             if (@idel) {
3794                 delete_selected_tokens( $line, \@idel );
3795             }
3796         }
3797     } ## end loop to delete selected alignment tokens
3798
3799     return;
3800 } ## end sub prune_alignment_tree
3801
3802 sub Dump_tree_groups {
3803     my ( $rgroup, $msg ) = @_;
3804     print "$msg\n";
3805     local $" = ')(';
3806     foreach my $item ( @{$rgroup} ) {
3807         my @fix = @{$item};
3808         foreach (@fix) { $_ = "undef" unless defined $_; }
3809         $fix[4] = "...";
3810         print "(@fix)\n";
3811     }
3812     return;
3813 }
3814
3815 {    ## closure for sub is_marginal_match
3816
3817     my %is_if_or;
3818     my %is_assignment;
3819     my %is_good_alignment;
3820
3821     # This test did not give sufficiently better results to use as an update,
3822     # but the flag is worth keeping as a starting point for future testing.
3823     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
3824
3825     BEGIN {
3826
3827         my @q = qw(
3828           if unless or ||
3829         );
3830         @is_if_or{@q} = (1) x scalar(@q);
3831
3832         @q = qw(
3833           = **= += *= &= <<= &&=
3834           -= /= |= >>= ||= //=
3835           .= %= ^=
3836           x=
3837         );
3838         @is_assignment{@q} = (1) x scalar(@q);
3839
3840         # Vertically aligning on certain "good" tokens is usually okay
3841         # so we can be less restrictive in marginal cases.
3842         @q = qw( { ? => = );
3843         push @q, (',');
3844         @is_good_alignment{@q} = (1) x scalar(@q);
3845     }
3846
3847     sub is_marginal_match {
3848
3849         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
3850
3851         # Decide if we should undo some or all of the common alignments of a
3852         # group of just two lines.
3853
3854         # Given:
3855         #   $line_0 and $line_1 - the two lines
3856         #   $group_level = the indentation level of the group being processed
3857         #   $imax_align = the maximum index of the common alignment tokens
3858         #                 of the two lines
3859         #   $imax_prev  = the maximum index of the common alignment tokens
3860         #                 with the line before $line_0 (=-1 of does not exist)
3861
3862         # Return:
3863         #   $is_marginal = true if the two lines should NOT be fully aligned
3864         #                = false if the two lines can remain fully aligned
3865         #   $imax_align  = the index of the highest alignment token shared by
3866         #                  these two lines to keep if the match is marginal.
3867
3868         # When we have an alignment group of just two lines like this, we are
3869         # working in the twilight zone of what looks good and what looks bad.
3870         # This routine is a collection of rules which work have been found to
3871         # work fairly well, but it will need to be updated from time to time.
3872
3873         my $is_marginal = 0;
3874
3875         # always keep alignments of a terminal else or ternary
3876         goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
3877
3878         # always align lists
3879         my $group_list_type = $line_0->get_list_type();
3880         goto RETURN if ($group_list_type);
3881
3882         # always align hanging side comments
3883         my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
3884         goto RETURN if ($is_hanging_side_comment);
3885
3886         my $jmax_0           = $line_0->get_jmax();
3887         my $jmax_1           = $line_1->get_jmax();
3888         my $rtokens_1        = $line_1->get_rtokens();
3889         my $rtokens_0        = $line_0->get_rtokens();
3890         my $rfield_lengths_0 = $line_0->get_rfield_lengths();
3891         my $rfield_lengths_1 = $line_1->get_rfield_lengths();
3892         my $rpatterns_0      = $line_0->get_rpatterns();
3893         my $rpatterns_1      = $line_1->get_rpatterns();
3894         my $imax_next        = $line_1->get_imax_pair();
3895
3896         # We will scan the alignment tokens and set a flag '$is_marginal' if
3897         # it seems that the an alignment would look bad.
3898         my $max_pad            = 0;
3899         my $saw_good_alignment = 0;
3900         my $saw_if_or;        # if we saw an 'if' or 'or' at group level
3901         my $raw_tokb = "";    # first token seen at group level
3902         my $jfirst_bad;
3903         my $line_ending_fat_comma;    # is last token just a '=>' ?
3904         my $j0_eq_pad;
3905         my $j0_max_pad = 0;
3906
3907         for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
3908             my ( $raw_tok, $lev, $tag, $tok_count ) =
3909               decode_alignment_token( $rtokens_1->[$j] );
3910             if ( $raw_tok && $lev == $group_level ) {
3911                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
3912                 $saw_if_or ||= $is_if_or{$raw_tok};
3913             }
3914
3915             # When the first of the two lines ends in a bare '=>' this will
3916             # probably be marginal match.  (For a bare =>, the next field length
3917             # will be 2 or 3, depending on side comment)
3918             $line_ending_fat_comma =
3919                  $j == $jmax_1 - 2
3920               && $raw_tok eq '=>'
3921               && $rfield_lengths_0->[ $j + 1 ] <= 3;
3922
3923             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
3924             if ( $j == 0 ) {
3925                 $pad += $line_1->get_leading_space_count() -
3926                   $line_0->get_leading_space_count();
3927
3928                 # Remember the pad at a leading equals
3929                 if ( $raw_tok eq '=' && $lev == $group_level ) {
3930                     $j0_eq_pad = $pad;
3931                     $j0_max_pad =
3932                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
3933                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
3934                 }
3935             }
3936
3937             if ( $pad < 0 )        { $pad     = -$pad }
3938             if ( $pad > $max_pad ) { $max_pad = $pad }
3939             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
3940                 $saw_good_alignment = 1;
3941             }
3942             else {
3943                 $jfirst_bad = $j unless defined($jfirst_bad);
3944             }
3945             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
3946
3947                 # Flag this as a marginal match since patterns differ.
3948                 # Normally, we will not allow just two lines to match if
3949                 # marginal. But we can allow matching in some specific cases.
3950
3951                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
3952                 $is_marginal = 1  if ( $is_marginal == 0 );
3953                 if ( $raw_tok eq '=' ) {
3954
3955                     # Here is an example of a marginal match:
3956                     #       $done{$$op} = 1;
3957                     #       $op         = compile_bblock($op);
3958                     # The left tokens are both identifiers, but
3959                     # one accesses a hash and the other doesn't.
3960                     # We'll let this be a tentative match and undo
3961                     # it later if we don't find more than 2 lines
3962                     # in the group.
3963                     $is_marginal = 2;
3964                 }
3965             }
3966         }
3967
3968         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
3969
3970         # Turn off the "marginal match" flag in some cases...
3971         # A "marginal match" occurs when the alignment tokens agree
3972         # but there are differences in the other tokens (patterns).
3973         # If we leave the marginal match flag set, then the rule is that we
3974         # will align only if there are more than two lines in the group.
3975         # We will turn of the flag if we almost have a match
3976         # and either we have seen a good alignment token or we
3977         # just need a small pad (2 spaces) to fit.  These rules are
3978         # the result of experimentation.  Tokens which misaligned by just
3979         # one or two characters are annoying.  On the other hand,
3980         # large gaps to less important alignment tokens are also annoying.
3981         if ( $is_marginal == 1
3982             && ( $saw_good_alignment || $max_pad < 3 ) )
3983         {
3984             $is_marginal = 0;
3985         }
3986
3987         # We will use the line endings to help decide on alignments...
3988         # See if the lines end with semicolons...
3989         my $sc_term0;
3990         my $sc_term1;
3991         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
3992
3993             # shouldn't happen
3994         }
3995         else {
3996             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
3997             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
3998             $sc_term0 = $pat0 =~ /;b?$/;
3999             $sc_term1 = $pat1 =~ /;b?$/;
4000         }
4001
4002         if ( !$is_marginal && !$sc_term0 ) {
4003
4004             # First line of assignment should be semicolon terminated.
4005             # For example, do not align here:
4006             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4007             #    $$href{-NUM_DIRS} = 0;
4008             if ( $is_assignment{$raw_tokb} ) {
4009                 $is_marginal = 1;
4010             }
4011         }
4012
4013         # Try to avoid some undesirable alignments of opening tokens
4014         # for example, the space between grep and { here:
4015         #  return map { ( $_ => $_ ) }
4016         #    grep     { /$handles/ } $self->_get_delegate_method_list;
4017         $is_marginal ||=
4018              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
4019           && $jmax_1 == 2
4020           && $sc_term0 ne $sc_term1;
4021
4022         ########################################
4023         # return unless this is a marginal match
4024         ########################################
4025         goto RETURN if ( !$is_marginal );
4026
4027         # Undo the marginal match flag in certain cases,
4028
4029         # Two lines with a leading equals-like operator are allowed to
4030         # align if the patterns to the left of the equals are the same.
4031         # For example the following two lines are a marginal match but have
4032         # the same left side patterns, so we will align the equals.
4033         #     my $orig = my $format = "^<<<<< ~~\n";
4034         #     my $abc  = "abc";
4035         # But these have a different left pattern so they will not be
4036         # aligned
4037         #     $xmldoc .= $`;
4038         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
4039
4040         # First line semicolon terminated but second not, usually ok:
4041         #               my $want = "'ab', 'a', 'b'";
4042         #               my $got  = join( ", ",
4043         #                    map { defined($_) ? "'$_'" : "undef" }
4044         #                          @got );
4045         #  First line not semicolon terminated, Not OK to match:
4046         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
4047         #      $$href{-NUM_DIRS} = 0;
4048         my $pat0 = $rpatterns_0->[0];
4049         my $pat1 = $rpatterns_1->[0];
4050
4051         ##########################################################
4052         # Turn off the marginal flag for some types of assignments
4053         ##########################################################
4054         if ( $is_assignment{$raw_tokb} ) {
4055
4056             # undo marginal flag if first line is semicolon terminated
4057             # and leading patters match
4058             if ($sc_term0) {    # && $sc_term1) {
4059                 $is_marginal = $pat0 ne $pat1;
4060             }
4061         }
4062         elsif ( $raw_tokb eq '=>' ) {
4063
4064             # undo marginal flag if patterns match
4065             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
4066         }
4067         elsif ( $raw_tokb eq '=~' ) {
4068
4069             # undo marginal flag if both lines are semicolon terminated
4070             # and leading patters match
4071             if ( $sc_term1 && $sc_term0 ) {
4072                 $is_marginal = $pat0 ne $pat1;
4073             }
4074         }
4075
4076         ######################################################
4077         # Turn off the marginal flag if we saw an 'if' or 'or'
4078         ######################################################
4079
4080         # A trailing 'if' and 'or' often gives a good alignment
4081         # For example, we can align these:
4082         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
4083         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
4084
4085         # or
4086         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
4087         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
4088
4089         if ($saw_if_or) {
4090
4091             # undo marginal flag if both lines are semicolon terminated
4092             if ( $sc_term0 && $sc_term1 ) {
4093                 $is_marginal = 0;
4094             }
4095         }
4096
4097         # For a marginal match, only keep matches before the first 'bad' match
4098         if (   $is_marginal
4099             && defined($jfirst_bad)
4100             && $imax_align > $jfirst_bad - 1 )
4101         {
4102             $imax_align = $jfirst_bad - 1;
4103         }
4104
4105         ###########################################################
4106         # Allow sweep to match lines with leading '=' in some cases
4107         ###########################################################
4108         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
4109
4110             if (
4111
4112                 # If there is a following line with leading equals, or
4113                 # preceding line with leading equals, then let the sweep align
4114                 # them without restriction.  For example, the first two lines
4115                 # here are a marginal match, but they are followed by a line
4116                 # with leading equals, so the sweep-lr logic can align all of
4117                 # the lines:
4118
4119                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4120                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4121                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4122                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4123
4124                 # Likewise, if we reverse the two pairs we want the same result
4125
4126                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
4127                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
4128                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
4129                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
4130
4131                 (
4132                        $imax_next >= 0
4133                     || $imax_prev >= 0
4134                     || TEST_MARGINAL_EQ_ALIGNMENT
4135                 )
4136                 && $j0_eq_pad >= -$j0_max_pad
4137                 && $j0_eq_pad <= $j0_max_pad
4138               )
4139             {
4140
4141                 # But do not do this if there is a comma before the '='.
4142                 # For example, the first two lines below have commas and
4143                 # therefore are not allowed to align with lines 3 & 4:
4144
4145                 # my ( $x, $y ) = $self->Size();                      #<--line_0
4146                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
4147                 # my $vx = $right - $left;
4148                 # my $vy = $bottom - $top;
4149
4150                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
4151                     $imax_align = 0;
4152                 }
4153             }
4154         }
4155
4156       RETURN:
4157         return ( $is_marginal, $imax_align );
4158     }
4159 }
4160
4161 sub get_extra_leading_spaces {
4162
4163     my ( $rlines, $rgroups ) = @_;
4164
4165     #----------------------------------------------------------
4166     # Define any extra indentation space (for the -lp option).
4167     # Here is why:
4168     # If a list has side comments, sub scan_list must dump the
4169     # list before it sees everything.  When this happens, it sets
4170     # the indentation to the standard scheme, but notes how
4171     # many spaces it would have liked to use.  We may be able
4172     # to recover that space here in the event that all of the
4173     # lines of a list are back together again.
4174     #----------------------------------------------------------
4175
4176     return 0 unless ( @{$rlines} && @{$rgroups} );
4177
4178     my $object = $rlines->[0]->get_indentation();
4179     return 0 unless ( ref($object) );
4180     my $extra_leading_spaces            = 0;
4181     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
4182     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
4183
4184     my $min_spaces = $extra_indentation_spaces_wanted;
4185     if ( $min_spaces > 0 ) { $min_spaces = 0 }
4186
4187     # loop over all groups
4188     my $ng      = -1;
4189     my $ngroups = @{$rgroups};
4190     foreach my $item ( @{$rgroups} ) {
4191         $ng++;
4192         my ( $jbeg, $jend ) = @{$item};
4193         foreach my $j ( $jbeg .. $jend ) {
4194             next if ( $j == 0 );
4195
4196             # all indentation objects must be the same
4197             if ( $object != $rlines->[$j]->get_indentation() ) {
4198                 return 0;
4199             }
4200         }
4201
4202        # find the maximum space without exceeding the line length for this group
4203         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
4204         my $spaces =
4205           ( $avail > $extra_indentation_spaces_wanted )
4206           ? $extra_indentation_spaces_wanted
4207           : $avail;
4208
4209         #########################################################
4210         # Note: min spaces can be negative; for example with -gnu
4211         # f(
4212         #   do { 1; !!(my $x = bless []); }
4213         #  );
4214         #########################################################
4215         # The following rule is needed to match older formatting:
4216         # For multiple groups, we will keep spaces non-negative.
4217         # For a single group, we will allow a negative space.
4218         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
4219
4220         # update the minimum spacing
4221         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
4222             $extra_leading_spaces = $spaces;
4223         }
4224     }
4225
4226     # update the indentation object because with -icp the terminal
4227     # ');' will use the same adjustment.
4228     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
4229     return $extra_leading_spaces;
4230 }
4231
4232 sub forget_side_comment {
4233     my ($self) = @_;
4234     $self->[_last_side_comment_column_] = 0;
4235     return;
4236 }
4237
4238 sub is_good_side_comment_column {
4239     my ( $self, $line, $line_number, $level, $num5 ) = @_;
4240
4241     # Upon encountering the first side comment of a group, decide if
4242     # a previous side comment should be forgotten.  This involves
4243     # checking several rules.
4244
4245     # Return true to keep old comment location
4246     # Return false to forget old comment location
4247
4248     my $rfields                 = $line->get_rfields();
4249     my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4250
4251     # RULE1: Never forget comment before a hanging side comment
4252     goto KEEP if ($is_hanging_side_comment);
4253
4254     # RULE2: Forget a side comment after a short line difference,
4255     # where 'short line difference' is computed from a formula.
4256     # Using a smooth formula helps minimize sudden large changes.
4257     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
4258     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
4259
4260     # '$num5' is the number of comments in the first 5 lines after the first
4261     # comment.  It is needed to keep a compact group of side comments from
4262     # being influenced by a more distant side comment.
4263     $num5 = 1 unless ($num5);
4264
4265     # Some values:
4266
4267     #        $adiff  $num5   $short_diff
4268     #        0       *       12
4269     #        1       1       6
4270     #        1       2       4
4271     #        1       3       3
4272     #        1       4       2
4273     #        2       1       4
4274     #        2       2       2
4275     #        2       3       1
4276     #        3       1       3
4277     #        3       2       1
4278
4279     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
4280
4281     goto FORGET
4282       if ( $line_diff > $short_diff );
4283
4284     # RULE3: Forget a side comment if this line is at lower level and
4285     # ends a block
4286     my $last_sc_level = $self->[_last_side_comment_level_];
4287     goto FORGET
4288       if ( $level < $last_sc_level
4289         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
4290
4291     # RULE 4: Forget the last side comment if this comment might join a cached
4292     # line ...
4293     if ( my $cached_line_type = get_cached_line_type() ) {
4294
4295         # ... otherwise side comment alignment will get messed up.
4296         # For example, in the following test script
4297         # with using 'perltidy -sct -act=2', the last comment would try to
4298         # align with the previous and then be in the wrong column when
4299         # the lines are combined:
4300
4301         # foreach $line (
4302         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
4303         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
4304         #    [0, 4, 8], [2, 4, 6]
4305         #  )                                     # diagonals
4306         goto FORGET
4307           if ( $cached_line_type == 2 || $cached_line_type == 4 );
4308     }
4309
4310     # Otherwise, keep it alive
4311     goto KEEP;
4312
4313   FORGET:
4314     return 0;
4315
4316   KEEP:
4317     return 1;
4318 }
4319
4320 sub align_side_comments {
4321
4322     my ( $self, $rlines, $rgroups ) = @_;
4323
4324     # Align any side comments in this batch of lines
4325
4326     # Given:
4327     #  $rlines  - the lines
4328     #  $rgroups - the partition of the lines into groups
4329     #
4330     # We will be working group-by-group because all side comments
4331     # (real or fake) in each group are already aligned. So we just have
4332     # to make alignments between groups wherever possible.
4333
4334     # An unusual aspect is that within each group we have aligned both real
4335     # and fake side comments.  This has the consequence that the lengths of
4336     # long lines without real side comments can cause 'push' all side comments
4337     # to the right.  This seems unusual, but testing with and without this
4338     # feature shows that it is usually better this way.  Othewise, side
4339     # comments can be hidden between long lines without side comments and
4340     # thus be harder to read.
4341
4342     my $group_level        = $self->[_group_level_];
4343     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
4344       && $group_level == $self->[_last_level_written_];
4345
4346     # Find groups with side comments, and remember the first nonblank comment
4347     my $j_sc_beg;
4348     my @todo;
4349     my $ng = -1;
4350     foreach my $item ( @{$rgroups} ) {
4351         $ng++;
4352         my ( $jbeg, $jend ) = @{$item};
4353         foreach my $j ( $jbeg .. $jend ) {
4354             my $line = $rlines->[$j];
4355             my $jmax = $line->get_jmax();
4356             if ( $line->get_rfield_lengths()->[$jmax] ) {
4357
4358                 # this group has a line with a side comment
4359                 push @todo, $ng;
4360                 if ( !defined($j_sc_beg) ) {
4361                     $j_sc_beg = $j;
4362                 }
4363                 last;
4364             }
4365         }
4366     }
4367
4368     # done if no groups with side comments
4369     return unless @todo;
4370
4371     # Count $num5 = number of comments in the 5 lines after the first comment
4372     # This is an important factor in a decision formula
4373     my $num5 = 1;
4374     for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
4375         my $ldiff = $jj - $j_sc_beg;
4376         last if ( $ldiff > 5 );
4377         my $line   = $rlines->[$jj];
4378         my $jmax   = $line->get_jmax();
4379         my $sc_len = $line->get_rfield_lengths()->[$jmax];
4380         next unless ($sc_len);
4381         $num5++;
4382     }
4383
4384     # Forget the old side comment location if necessary
4385     my $line = $rlines->[$j_sc_beg];
4386     my $lnum =
4387       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
4388     my $keep_it =
4389       $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
4390     my $last_side_comment_column =
4391       $keep_it ? $self->[_last_side_comment_column_] : 0;
4392
4393     # If there are multiple groups we will do two passes
4394     # so that we can find a common alignment for all groups.
4395     my $MAX_PASS = @todo > 1 ? 2 : 1;
4396
4397     # Loop over passes
4398     my $max_comment_column = $last_side_comment_column;
4399     for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
4400
4401         # If there are two passes, then on the last pass make the old column
4402         # equal to the largest of the group.  This will result in the comments
4403         # being aligned if possible.
4404         if ( $PASS == $MAX_PASS ) {
4405             $last_side_comment_column = $max_comment_column;
4406         }
4407
4408         # Loop over the groups with side comments
4409         my $column_limit;
4410         foreach my $ng (@todo) {
4411             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
4412
4413             # Note that since all lines in a group have common alignments, we
4414             # just have to work on one of the lines (the first line).
4415             my $line                    = $rlines->[$jbeg];
4416             my $jmax                    = $line->get_jmax();
4417             my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
4418             last
4419               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
4420
4421             # the maximum space without exceeding the line length:
4422             my $avail = $line->get_available_space_on_right();
4423
4424             # try to use the previous comment column
4425             my $side_comment_column = $line->get_column( $jmax - 1 );
4426             my $move = $last_side_comment_column - $side_comment_column;
4427
4428             # Remember the maximum possible column of the first line with
4429             # side comment
4430             if ( !defined($column_limit) ) {
4431                 $column_limit = $side_comment_column + $avail;
4432             }
4433
4434             next if ( $jmax <= 0 );
4435
4436             # but if this doesn't work, give up and use the minimum space
4437             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
4438             if ( $move > $avail ) {
4439                 $move = $min_move;
4440             }
4441
4442             # but we want some minimum space to the comment
4443             if (   $move >= 0
4444                 && $j_sc_beg == 0
4445                 && $continuing_sc_flow )
4446             {
4447                 $min_move = 0;
4448             }
4449
4450             # remove constraints on hanging side comments
4451             if ($is_hanging_side_comment) { $min_move = 0 }
4452
4453             if ( $move < $min_move ) {
4454                 $move = $min_move;
4455             }
4456
4457             # don't exceed the available space
4458             if ( $move > $avail ) { $move = $avail }
4459
4460             # We can only increase space, never decrease.
4461             if ( $move < 0 ) { $move = 0 }
4462
4463             # Discover the largest column on the preliminary  pass
4464             if ( $PASS < $MAX_PASS ) {
4465                 my $col = $line->get_column( $jmax - 1 ) + $move;
4466
4467                 # but ignore columns too large for the starting line
4468                 if ( $col > $max_comment_column && $col < $column_limit ) {
4469                     $max_comment_column = $col;
4470                 }
4471             }
4472
4473             # Make the changes on the final pass
4474             else {
4475                 $line->increase_field_width( $jmax - 1, $move );
4476
4477                 # remember this column for the next group
4478                 $last_side_comment_column = $line->get_column( $jmax - 1 );
4479             }
4480         } ## end loop over groups
4481     } ## end loop over passes
4482
4483     # Find the last side comment
4484     my $j_sc_last;
4485     my $ng_last = $todo[-1];
4486     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
4487     for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
4488         my $line = $rlines->[$jj];
4489         my $jmax = $line->get_jmax();
4490         if ( $line->get_rfield_lengths()->[$jmax] ) {
4491             $j_sc_last = $jj;
4492             last;
4493         }
4494     }
4495
4496     # Save final side comment info for possible use by the next batch
4497     if ( defined($j_sc_last) ) {
4498         my $line_number =
4499           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
4500         $self->[_last_side_comment_column_]      = $last_side_comment_column;
4501         $self->[_last_side_comment_line_number_] = $line_number;
4502         $self->[_last_side_comment_level_]       = $group_level;
4503     }
4504     return;
4505 }
4506
4507 ###############################
4508 # CODE SECTION 6: Output Step A
4509 ###############################
4510
4511 sub valign_output_step_A {
4512
4513     ###############################################################
4514     # This is Step A in writing vertically aligned lines.
4515     # The line is prepared according to the alignments which have
4516     # been found. Then it is shipped to the next step.
4517     ###############################################################
4518
4519     my ( $self, $rinput_hash ) = @_;
4520
4521     my $line                 = $rinput_hash->{line};
4522     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
4523     my $do_not_align         = $rinput_hash->{do_not_align};
4524     my $group_leader_length  = $rinput_hash->{group_leader_length};
4525     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
4526     my $level                = $rinput_hash->{level};
4527
4528     my $rfields                   = $line->get_rfields();
4529     my $rfield_lengths            = $line->get_rfield_lengths();
4530     my $leading_space_count       = $line->get_leading_space_count();
4531     my $outdent_long_lines        = $line->get_outdent_long_lines();
4532     my $maximum_field_index       = $line->get_jmax();
4533     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
4534     my $Kend                      = $line->get_Kend();
4535     my $level_end                 = $line->get_level_end();
4536
4537     # add any extra spaces
4538     if ( $leading_space_count > $group_leader_length ) {
4539         $leading_space_count += $min_ci_gap;
4540     }
4541
4542     my $str     = $rfields->[0];
4543     my $str_len = $rfield_lengths->[0];
4544
4545     # loop to concatenate all fields of this line and needed padding
4546     my $total_pad_count = 0;
4547     for my $j ( 1 .. $maximum_field_index ) {
4548
4549         # skip zero-length side comments
4550         last
4551           if (
4552             ( $j == $maximum_field_index )
4553             && ( !defined( $rfields->[$j] )
4554                 || ( $rfield_lengths->[$j] == 0 ) )
4555           );
4556
4557         # compute spaces of padding before this field
4558         my $col = $line->get_column( $j - 1 );
4559         my $pad = $col - ( $str_len + $leading_space_count );
4560
4561         if ($do_not_align) {
4562             $pad =
4563               ( $j < $maximum_field_index )
4564               ? 0
4565               : $self->[_rOpts_minimum_space_to_comment_] - 1;
4566         }
4567
4568         # if the -fpsc flag is set, move the side comment to the selected
4569         # column if and only if it is possible, ignoring constraints on
4570         # line length and minimum space to comment
4571         if (   $self->[_rOpts_fixed_position_side_comment_]
4572             && $j == $maximum_field_index )
4573         {
4574             my $newpad =
4575               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
4576             if ( $newpad >= 0 ) { $pad = $newpad; }
4577         }
4578
4579         # accumulate the padding
4580         if ( $pad > 0 ) { $total_pad_count += $pad; }
4581
4582         # only add padding when we have a finite field;
4583         # this avoids extra terminal spaces if we have empty fields
4584         if ( $rfield_lengths->[$j] > 0 ) {
4585             $str .= ' ' x $total_pad_count;
4586             $str_len += $total_pad_count;
4587             $total_pad_count = 0;
4588             $str .= $rfields->[$j];
4589             $str_len += $rfield_lengths->[$j];
4590         }
4591         else {
4592             $total_pad_count = 0;
4593         }
4594     }
4595
4596     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
4597
4598     # ship this line off
4599     $self->valign_output_step_B(
4600         {
4601             leading_space_count => $leading_space_count + $extra_leading_spaces,
4602             line                => $str,
4603             line_length         => $str_len,
4604             side_comment_length => $side_comment_length,
4605             outdent_long_lines  => $outdent_long_lines,
4606             rvertical_tightness_flags => $rvertical_tightness_flags,
4607             level                     => $level,
4608             level_end                 => $level_end,
4609             Kend                      => $Kend,
4610         }
4611     );
4612     return;
4613 }
4614
4615 sub combine_fields {
4616
4617     # We have a group of two lines for which we do not want to align tokens
4618     # between index $imax_align and the side comment.  So we will delete fields
4619     # between $imax_align and the side comment.  Alignments have already
4620     # been set so we have to adjust them.
4621
4622     my ( $line_0, $line_1, $imax_align ) = @_;
4623
4624     if ( !defined($imax_align) ) { $imax_align = -1 }
4625
4626     # First delete the unwanted tokens
4627     my $jmax_old       = $line_0->get_jmax();
4628     my @old_alignments = $line_0->get_alignments();
4629     my @idel           = ( $imax_align + 1 .. $jmax_old - 2 );
4630
4631     return unless (@idel);
4632
4633     foreach my $line ( $line_0, $line_1 ) {
4634         delete_selected_tokens( $line, \@idel );
4635     }
4636
4637     # Now adjust the alignments.  Note that the side comment alignment
4638     # is always at jmax-1, and there is an ending alignment at jmax.
4639     my @new_alignments;
4640     if ( $imax_align >= 0 ) {
4641         @new_alignments[ 0 .. $imax_align ] =
4642           @old_alignments[ 0 .. $imax_align ];
4643     }
4644
4645     my $jmax_new = $line_0->get_jmax();
4646
4647     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
4648     $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
4649     $line_0->set_alignments(@new_alignments);
4650     $line_1->set_alignments(@new_alignments);
4651     return;
4652 }
4653
4654 sub get_output_line_number {
4655
4656     # The output line number reported to a caller =
4657     # the number of items still in the buffer +
4658     # the number of items written.
4659     return $_[0]->group_line_count() +
4660       $_[0]->[_file_writer_object_]->get_output_line_number();
4661 }
4662
4663 ###############################
4664 # CODE SECTION 7: Output Step B
4665 ###############################
4666
4667 {    ## closure for sub valign_output_step_B
4668
4669     # These are values for a cache used by valign_output_step_B.
4670     my $cached_line_text;
4671     my $cached_line_text_length;
4672     my $cached_line_type;
4673     my $cached_line_flag;
4674     my $cached_seqno;
4675     my $cached_line_valid;
4676     my $cached_line_leading_space_count;
4677     my $cached_seqno_string;
4678     my $cached_line_Kend;
4679     my $seqno_string;
4680     my $last_nonblank_seqno_string;
4681
4682     sub get_seqno_string {
4683         return $seqno_string;
4684     }
4685
4686     sub get_last_nonblank_seqno_string {
4687         return $last_nonblank_seqno_string;
4688     }
4689
4690     sub set_last_nonblank_seqno_string {
4691         my ($val) = @_;
4692         $last_nonblank_seqno_string = $val;
4693         return;
4694     }
4695
4696     sub get_cached_line_flag {
4697         return $cached_line_flag;
4698     }
4699
4700     sub get_cached_line_type {
4701         return $cached_line_type;
4702     }
4703
4704     sub set_cached_line_valid {
4705         my ($val) = @_;
4706         $cached_line_valid = $val;
4707         return;
4708     }
4709
4710     sub get_cached_seqno {
4711         return $cached_seqno;
4712     }
4713
4714     sub initialize_step_B_cache {
4715
4716         # valign_output_step_B cache:
4717         $cached_line_text                = "";
4718         $cached_line_text_length         = 0;
4719         $cached_line_type                = 0;
4720         $cached_line_flag                = 0;
4721         $cached_seqno                    = 0;
4722         $cached_line_valid               = 0;
4723         $cached_line_leading_space_count = 0;
4724         $cached_seqno_string             = "";
4725         $cached_line_Kend                = undef;
4726
4727         # These vars hold a string of sequence numbers joined together used by
4728         # the cache
4729         $seqno_string               = "";
4730         $last_nonblank_seqno_string = "";
4731         return;
4732     }
4733
4734     sub _flush_cache {
4735         my ($self) = @_;
4736         if ($cached_line_type) {
4737             $seqno_string = $cached_seqno_string;
4738             $self->valign_output_step_C(
4739                 $cached_line_text,
4740                 $cached_line_leading_space_count,
4741                 $self->[_last_level_written_],
4742                 $cached_line_Kend,
4743             );
4744             $cached_line_type        = 0;
4745             $cached_line_text        = "";
4746             $cached_line_text_length = 0;
4747             $cached_seqno_string     = "";
4748             $cached_line_Kend        = undef;
4749         }
4750         return;
4751     }
4752
4753     sub valign_output_step_B {
4754
4755         ###############################################################
4756         # This is Step B in writing vertically aligned lines.
4757         # Vertical tightness is applied according to preset flags.
4758         # In particular this routine handles stacking of opening
4759         # and closing tokens.
4760         ###############################################################
4761
4762         my ( $self, $rinput ) = @_;
4763
4764         my $leading_space_count       = $rinput->{leading_space_count};
4765         my $str                       = $rinput->{line};
4766         my $str_length                = $rinput->{line_length};
4767         my $side_comment_length       = $rinput->{side_comment_length};
4768         my $outdent_long_lines        = $rinput->{outdent_long_lines};
4769         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
4770         my $level                     = $rinput->{level};
4771         my $level_end                 = $rinput->{level_end};
4772         my $Kend                      = $rinput->{Kend};
4773
4774         my $last_level_written = $self->[_last_level_written_];
4775
4776         # Useful -gcs test cases for wide characters are
4777         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
4778
4779         # handle outdenting of long lines:
4780         my $is_outdented_line;
4781         if ($outdent_long_lines) {
4782             my $excess =
4783               $str_length -
4784               $side_comment_length +
4785               $leading_space_count -
4786               $self->maximum_line_length_for_level($level);
4787             if ( $excess > 0 ) {
4788                 $leading_space_count = 0;
4789                 my $file_writer_object = $self->[_file_writer_object_];
4790                 my $last_outdented_line_at =
4791                   $file_writer_object->get_output_line_number();
4792                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
4793
4794                 my $outdented_line_count = $self->[_outdented_line_count_];
4795                 unless ($outdented_line_count) {
4796                     $self->[_first_outdented_line_at_] =
4797                       $last_outdented_line_at;
4798                 }
4799                 $outdented_line_count++;
4800                 $self->[_outdented_line_count_] = $outdented_line_count;
4801                 $is_outdented_line = 1;
4802             }
4803         }
4804
4805         # Make preliminary leading whitespace.  It could get changed
4806         # later by entabbing, so we have to keep track of any changes
4807         # to the leading_space_count from here on.
4808         my $leading_string =
4809           $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
4810         my $leading_string_length = length($leading_string);
4811
4812         # Unpack any recombination data; it was packed by
4813         # sub send_lines_to_vertical_aligner. Contents:
4814         #
4815         #   [0] type: 1=opening non-block    2=closing non-block
4816         #             3=opening block brace  4=closing block brace
4817         #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
4818         #             if closing: spaces of padding to use
4819         #   [2] sequence number of container
4820         #   [3] valid flag: do not append if this flag is false
4821         #
4822         my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
4823             $seqno_end );
4824         if ($rvertical_tightness_flags) {
4825             (
4826                 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
4827                 $seqno_end
4828             ) = @{$rvertical_tightness_flags};
4829         }
4830
4831         $seqno_string = $seqno_end;
4832
4833         # handle any cached line ..
4834         # either append this line to it or write it out
4835         # Note: the function length() is used in this next test out of caution.
4836         # All testing has shown that the variable $cached_line_text_length is
4837         # correct, but its calculation is complex and a loss of cached text
4838         # would be a disaster.
4839         if ( length($cached_line_text) ) {
4840
4841             # Dump an invalid cached line
4842             if ( !$cached_line_valid ) {
4843                 $self->valign_output_step_C(
4844                     $cached_line_text,   $cached_line_leading_space_count,
4845                     $last_level_written, $cached_line_Kend
4846                 );
4847             }
4848
4849             # Handle cached line ending in OPENING tokens
4850             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
4851
4852                 my $gap = $leading_space_count - $cached_line_text_length;
4853
4854                 # handle option of just one tight opening per line:
4855                 if ( $cached_line_flag == 1 ) {
4856                     if ( defined($open_or_close) && $open_or_close == 1 ) {
4857                         $gap = -1;
4858                     }
4859                 }
4860
4861                 # Do not join the lines if this might produce a one-line
4862                 # container which exceeds the maximum line length.  This is
4863                 # necessary prevent blinking, particularly with the combination
4864                 # -xci -pvt=2.  In that case a one-line block alternately forms
4865                 # and breaks, causing -xci to alternately turn on and off (case
4866                 # b765).
4867                 # Patched to fix cases b656 b862 b971 b972: always do the check
4868                 # if -vmll is set.  The reason is that the -vmll option can
4869                 # cause changes in the maximum line length, leading to blinkers
4870                 # if not checked.
4871                 if (
4872                     $gap >= 0
4873                     && ( $self->[_rOpts_variable_maximum_line_length_]
4874                         || ( defined($level_end) && $level > $level_end ) )
4875                   )
4876                 {
4877                     my $test_line_length =
4878                       $cached_line_text_length + $gap + $str_length;
4879                     my $maximum_line_length =
4880                       $self->maximum_line_length_for_level($last_level_written);
4881
4882                     # Add a small tolerance in the length test (fixes case b862)
4883                     if ( $test_line_length > $maximum_line_length - 2 ) {
4884                         $gap = -1;
4885                     }
4886                 }
4887
4888                 if ( $gap >= 0 && defined($seqno_beg) ) {
4889                     $leading_string        = $cached_line_text . ' ' x $gap;
4890                     $leading_string_length = $cached_line_text_length + $gap;
4891                     $leading_space_count   = $cached_line_leading_space_count;
4892                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
4893                     $level        = $last_level_written;
4894                 }
4895                 else {
4896                     $self->valign_output_step_C(
4897                         $cached_line_text,   $cached_line_leading_space_count,
4898                         $last_level_written, $cached_line_Kend
4899                     );
4900                 }
4901             }
4902
4903             # Handle cached line ending in CLOSING tokens
4904             else {
4905                 my $test_line =
4906                   $cached_line_text . ' ' x $cached_line_flag . $str;
4907                 my $test_line_length =
4908                   $cached_line_text_length + $cached_line_flag + $str_length;
4909                 if (
4910
4911                     # The new line must start with container
4912                     $seqno_beg
4913
4914                     # The container combination must be okay..
4915                     && (
4916
4917                         # okay to combine like types
4918                         ( $open_or_close == $cached_line_type )
4919
4920                         # closing block brace may append to non-block
4921                         || ( $cached_line_type == 2 && $open_or_close == 4 )
4922
4923                         # something like ');'
4924                         || ( !$open_or_close && $cached_line_type == 2 )
4925
4926                     )
4927
4928                     # The combined line must fit
4929                     && (
4930                         $test_line_length <=
4931                         $self->maximum_line_length_for_level(
4932                             $last_level_written)
4933                     )
4934                   )
4935                 {
4936
4937                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
4938
4939                     # Patch to outdent closing tokens ending # in ');' If we
4940                     # are joining a line like ');' to a previous stacked set of
4941                     # closing tokens, then decide if we may outdent the
4942                     # combined stack to the indentation of the ');'.  Since we
4943                     # should not normally outdent any of the other tokens more
4944                     # than the indentation of the lines that contained them, we
4945                     # will only do this if all of the corresponding opening
4946                     # tokens were on the same line.  This can happen with -sot
4947                     # and -sct.
4948
4949                     # For example, it is ok here:
4950                     #   __PACKAGE__->load_components( qw(
4951                     #         PK::Auto
4952                     #         Core
4953                     #   ));
4954                     #
4955                     # But, for example, we do not outdent in this example
4956                     # because that would put the closing sub brace out farther
4957                     # than the opening sub brace:
4958                     #
4959                     #   perltidy -sot -sct
4960                     #   $c->Tk::bind(
4961                     #       '<Control-f>' => sub {
4962                     #           my ($c) = @_;
4963                     #           my $e = $c->XEvent;
4964                     #           itemsUnderArea $c;
4965                     #       } );
4966                     #
4967                     if (   $str =~ /^\);/
4968                         && $cached_line_text =~ /^[\)\}\]\s]*$/ )
4969                     {
4970
4971                         # The way to tell this is if the stacked sequence
4972                         # numbers of this output line are the reverse of the
4973                         # stacked sequence numbers of the previous non-blank
4974                         # line of sequence numbers.  So we can join if the
4975                         # previous nonblank string of tokens is the mirror
4976                         # image.  For example if stack )}] is 13:8:6 then we
4977                         # are looking for a leading stack like [{( which
4978                         # is 6:8:13. We only need to check the two ends,
4979                         # because the intermediate tokens must fall in order.
4980                         # Note on speed: having to split on colons and
4981                         # eliminate multiple colons might appear to be slow,
4982                         # but it's not an issue because we almost never come
4983                         # through here.  In a typical file we don't.
4984
4985                         $seqno_string               =~ s/^:+//;
4986                         $last_nonblank_seqno_string =~ s/^:+//;
4987                         $seqno_string               =~ s/:+/:/g;
4988                         $last_nonblank_seqno_string =~ s/:+/:/g;
4989
4990                         # how many spaces can we outdent?
4991                         my $diff =
4992                           $cached_line_leading_space_count -
4993                           $leading_space_count;
4994                         if (   $diff > 0
4995                             && length($seqno_string)
4996                             && length($last_nonblank_seqno_string) ==
4997                             length($seqno_string) )
4998                         {
4999                             my @seqno_last =
5000                               ( split /:/, $last_nonblank_seqno_string );
5001                             my @seqno_now = ( split /:/, $seqno_string );
5002                             if (   @seqno_now
5003                                 && @seqno_last
5004                                 && $seqno_now[-1] == $seqno_last[0]
5005                                 && $seqno_now[0] == $seqno_last[-1] )
5006                             {
5007
5008                                 # OK to outdent ..
5009                                 # for absolute safety, be sure we only remove
5010                                 # whitespace
5011                                 my $ws = substr( $test_line, 0, $diff );
5012                                 if ( ( length($ws) == $diff )
5013                                     && $ws =~ /^\s+$/ )
5014                                 {
5015
5016                                     $test_line = substr( $test_line, $diff );
5017                                     $cached_line_leading_space_count -= $diff;
5018                                     $last_level_written =
5019                                       $self->level_change(
5020                                         $cached_line_leading_space_count,
5021                                         $diff, $last_level_written );
5022                                     $self->reduce_valign_buffer_indentation(
5023                                         $diff);
5024                                 }
5025
5026                                 # shouldn't happen, but not critical:
5027                                 ##else {
5028                                 ## ERROR transferring indentation here
5029                                 ##}
5030                             }
5031                         }
5032                     }
5033
5034                     $str                   = $test_line;
5035                     $str_length            = $test_line_length;
5036                     $leading_string        = "";
5037                     $leading_string_length = 0;
5038                     $leading_space_count   = $cached_line_leading_space_count;
5039                     $level                 = $last_level_written;
5040                 }
5041                 else {
5042                     $self->valign_output_step_C(
5043                         $cached_line_text,   $cached_line_leading_space_count,
5044                         $last_level_written, $cached_line_Kend
5045                     );
5046                 }
5047             }
5048         }
5049         $cached_line_type        = 0;
5050         $cached_line_text        = "";
5051         $cached_line_text_length = 0;
5052         $cached_line_Kend        = undef;
5053
5054         # make the line to be written
5055         my $line        = $leading_string . $str;
5056         my $line_length = $leading_string_length + $str_length;
5057
5058         # Safety check: be sure that a line to be cached as a stacked block
5059         # brace line ends in the appropriate opening or closing block brace.
5060         # This should always be the case if the caller set flags correctly.
5061         # Code '3' is for -sobb, code '4' is for -scbb.
5062         if ($open_or_close) {
5063             if (   $open_or_close == 3 && $line !~ /\{\s*$/
5064                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
5065             {
5066                 $open_or_close = 0;
5067             }
5068         }
5069
5070         # write or cache this line ...
5071         # fix for case b999: do not cache an outdented line
5072         if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
5073         {
5074             $self->valign_output_step_C( $line, $leading_space_count, $level,
5075                 $Kend );
5076         }
5077         else {
5078             $cached_line_text                = $line;
5079             $cached_line_text_length         = $line_length;
5080             $cached_line_type                = $open_or_close;
5081             $cached_line_flag                = $tightness_flag;
5082             $cached_seqno                    = $seqno;
5083             $cached_line_valid               = $valid;
5084             $cached_line_leading_space_count = $leading_space_count;
5085             $cached_seqno_string             = $seqno_string;
5086             $cached_line_Kend                = $Kend;
5087         }
5088
5089         $self->[_last_level_written_]       = $level;
5090         $self->[_last_side_comment_length_] = $side_comment_length;
5091         return;
5092     }
5093 }
5094
5095 ###############################
5096 # CODE SECTION 8: Output Step C
5097 ###############################
5098
5099 {    ## closure for sub valign_output_step_C
5100
5101     # Vertical alignment buffer used by valign_output_step_C
5102     my $valign_buffer_filling;
5103     my @valign_buffer;
5104
5105     sub initialize_valign_buffer {
5106         @valign_buffer         = ();
5107         $valign_buffer_filling = "";
5108         return;
5109     }
5110
5111     sub dump_valign_buffer {
5112         my ($self) = @_;
5113         if (@valign_buffer) {
5114             foreach (@valign_buffer) {
5115                 $self->valign_output_step_D( @{$_} );
5116             }
5117             @valign_buffer = ();
5118         }
5119         $valign_buffer_filling = "";
5120         return;
5121     }
5122
5123     sub reduce_valign_buffer_indentation {
5124
5125         my ( $self, $diff ) = @_;
5126         if ( $valign_buffer_filling && $diff ) {
5127             my $max_valign_buffer = @valign_buffer;
5128             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
5129                 my ( $line, $leading_space_count, $level, $Kend ) =
5130                   @{ $valign_buffer[$i] };
5131                 my $ws = substr( $line, 0, $diff );
5132                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
5133                     $line = substr( $line, $diff );
5134                 }
5135                 if ( $leading_space_count >= $diff ) {
5136                     $leading_space_count -= $diff;
5137                     $level =
5138                       $self->level_change( $leading_space_count, $diff,
5139                         $level );
5140                 }
5141                 $valign_buffer[$i] =
5142                   [ $line, $leading_space_count, $level, $Kend ];
5143             }
5144         }
5145         return;
5146     }
5147
5148     sub valign_output_step_C {
5149
5150         ###############################################################
5151         # This is Step C in writing vertically aligned lines.
5152         # Lines are either stored in a buffer or passed along to the next step.
5153         # The reason for storing lines is that we may later want to reduce their
5154         # indentation when -sot and -sct are both used.
5155         ###############################################################
5156         my ( $self, @args ) = @_;
5157
5158         my $seqno_string               = get_seqno_string();
5159         my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
5160
5161         # Dump any saved lines if we see a line with an unbalanced opening or
5162         # closing token.
5163         $self->dump_valign_buffer()
5164           if ( $seqno_string && $valign_buffer_filling );
5165
5166         # Either store or write this line
5167         if ($valign_buffer_filling) {
5168             push @valign_buffer, [@args];
5169         }
5170         else {
5171             $self->valign_output_step_D(@args);
5172         }
5173
5174         # For lines starting or ending with opening or closing tokens..
5175         if ($seqno_string) {
5176             $last_nonblank_seqno_string = $seqno_string;
5177             set_last_nonblank_seqno_string($seqno_string);
5178
5179             # Start storing lines when we see a line with multiple stacked
5180             # opening tokens.
5181             # patch for RT #94354, requested by Colin Williams
5182             if (   $seqno_string =~ /^\d+(\:+\d+)+$/
5183                 && $args[0] !~ /^[\}\)\]\:\?]/ )
5184             {
5185
5186                 # This test is efficient but a little subtle: The first test
5187                 # says that we have multiple sequence numbers and hence
5188                 # multiple opening or closing tokens in this line.  The second
5189                 # part of the test rejects stacked closing and ternary tokens.
5190                 # So if we get here then we should have stacked unbalanced
5191                 # opening tokens.
5192
5193                 # Here is a complex example:
5194
5195                 # Foo($Bar[0], {  # (side comment)
5196                 #       baz => 1,
5197                 # });
5198
5199                 # The first line has sequence 6::4.  It does not begin with
5200                 # a closing token or ternary, so it passes the test and must be
5201                 # stacked opening tokens.
5202
5203                 # The last line has sequence 4:6 but is a stack of closing
5204                 # tokens, so it gets rejected.
5205
5206                 # Note that the sequence number of an opening token for a qw
5207                 # quote is a negative number and will be rejected.  For
5208                 # example, for the following line: skip_symbols([qw(
5209                 # $seqno_string='10:5:-1'.  It would be okay to accept it but I
5210                 # decided not to do this after testing.
5211
5212                 $valign_buffer_filling = $seqno_string;
5213
5214             }
5215         }
5216         return;
5217     }
5218 }
5219
5220 ###############################
5221 # CODE SECTION 9: Output Step D
5222 ###############################
5223
5224 sub valign_output_step_D {
5225
5226     ###############################################################
5227     # This is Step D in writing vertically aligned lines.
5228     # It is the end of the vertical alignment pipeline.
5229     # Write one vertically aligned line of code to the output object.
5230     ###############################################################
5231
5232     my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
5233
5234     # The line is currently correct if there is no tabbing (recommended!)
5235     # We may have to lop off some leading spaces and replace with tabs.
5236     if ( $leading_space_count > 0 ) {
5237
5238         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5239         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5240         my $rOpts_entab_leading_whitespace =
5241           $self->[_rOpts_entab_leading_whitespace_];
5242
5243         # Nothing to do if no tabs
5244         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5245             || $rOpts_indent_columns <= 0 )
5246         {
5247
5248             # nothing to do
5249         }
5250
5251         # Handle entab option
5252         elsif ($rOpts_entab_leading_whitespace) {
5253
5254             # Patch 12-nov-2018 based on report from Glenn. Extra padding was
5255             # not correctly entabbed, nor were side comments: Increase leading
5256             # space count for a padded line to get correct tabbing
5257             if ( $line =~ /^(\s+)(.*)$/ ) {
5258                 my $spaces = length($1);
5259                 if ( $spaces > $leading_space_count ) {
5260                     $leading_space_count = $spaces;
5261                 }
5262             }
5263
5264             my $space_count =
5265               $leading_space_count % $rOpts_entab_leading_whitespace;
5266             my $tab_count =
5267               int( $leading_space_count / $rOpts_entab_leading_whitespace );
5268             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
5269             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5270                 substr( $line, 0, $leading_space_count ) = $leading_string;
5271             }
5272             else {
5273
5274                 # shouldn't happen - program error counting whitespace
5275                 # - skip entabbing
5276                 DEBUG_TABS
5277                   && $self->warning(
5278 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5279                   );
5280             }
5281         }
5282
5283         # Handle option of one tab per level
5284         else {
5285             my $leading_string = ( "\t" x $level );
5286             my $space_count =
5287               $leading_space_count - $level * $rOpts_indent_columns;
5288
5289             # shouldn't happen:
5290             if ( $space_count < 0 ) {
5291
5292                 # But it could be an outdented comment
5293                 if ( $line !~ /^\s*#/ ) {
5294                     DEBUG_TABS
5295                       && $self->warning(
5296 "Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
5297                       );
5298                 }
5299                 $leading_string = ( ' ' x $leading_space_count );
5300             }
5301             else {
5302                 $leading_string .= ( ' ' x $space_count );
5303             }
5304             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
5305                 substr( $line, 0, $leading_space_count ) = $leading_string;
5306             }
5307             else {
5308
5309                 # shouldn't happen - program error counting whitespace
5310                 # we'll skip entabbing
5311                 DEBUG_TABS
5312                   && $self->warning(
5313 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
5314                   );
5315             }
5316         }
5317     }
5318     my $file_writer_object = $self->[_file_writer_object_];
5319     $file_writer_object->write_code_line( $line . "\n", $Kend );
5320
5321     return;
5322 }
5323
5324 {    ## closure for sub get_leading_string
5325
5326     my @leading_string_cache;
5327
5328     sub initialize_leading_string_cache {
5329         @leading_string_cache = ();
5330         return;
5331     }
5332
5333     sub get_leading_string {
5334
5335         # define the leading whitespace string for this line..
5336         my ( $self, $leading_whitespace_count, $group_level ) = @_;
5337
5338         # Handle case of zero whitespace, which includes multi-line quotes
5339         # (which may have a finite level; this prevents tab problems)
5340         if ( $leading_whitespace_count <= 0 ) {
5341             return "";
5342         }
5343
5344         # look for previous result
5345         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
5346             return $leading_string_cache[$leading_whitespace_count];
5347         }
5348
5349         # must compute a string for this number of spaces
5350         my $leading_string;
5351
5352         # Handle simple case of no tabs
5353         my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
5354         my $rOpts_tabs           = $self->[_rOpts_tabs_];
5355         my $rOpts_entab_leading_whitespace =
5356           $self->[_rOpts_entab_leading_whitespace_];
5357
5358         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
5359             || $rOpts_indent_columns <= 0 )
5360         {
5361             $leading_string = ( ' ' x $leading_whitespace_count );
5362         }
5363
5364         # Handle entab option
5365         elsif ($rOpts_entab_leading_whitespace) {
5366             my $space_count =
5367               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
5368             my $tab_count = int(
5369                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
5370             $leading_string = "\t" x $tab_count . ' ' x $space_count;
5371         }
5372
5373         # Handle option of one tab per level
5374         else {
5375             $leading_string = ( "\t" x $group_level );
5376             my $space_count =
5377               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
5378
5379             # shouldn't happen:
5380             if ( $space_count < 0 ) {
5381                 DEBUG_TABS
5382                   && $self->warning(
5383 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
5384                   );
5385
5386                 # -- skip entabbing
5387                 $leading_string = ( ' ' x $leading_whitespace_count );
5388             }
5389             else {
5390                 $leading_string .= ( ' ' x $space_count );
5391             }
5392         }
5393         $leading_string_cache[$leading_whitespace_count] = $leading_string;
5394         return $leading_string;
5395     }
5396 }    # end get_leading_string
5397
5398 ##########################
5399 # CODE SECTION 10: Summary
5400 ##########################
5401
5402 sub report_anything_unusual {
5403     my $self = shift;
5404
5405     my $outdented_line_count = $self->[_outdented_line_count_];
5406     if ( $outdented_line_count > 0 ) {
5407         $self->write_logfile_entry(
5408             "$outdented_line_count long lines were outdented:\n");
5409         my $first_outdented_line_at = $self->[_first_outdented_line_at_];
5410         $self->write_logfile_entry(
5411             "  First at output line $first_outdented_line_at\n");
5412
5413         if ( $outdented_line_count > 1 ) {
5414             my $last_outdented_line_at = $self->[_last_outdented_line_at_];
5415             $self->write_logfile_entry(
5416                 "   Last at output line $last_outdented_line_at\n");
5417         }
5418         $self->write_logfile_entry(
5419             "  use -noll to prevent outdenting, -l=n to increase line length\n"
5420         );
5421         $self->write_logfile_entry("\n");
5422     }
5423     return;
5424 }
5425 1;