lib/Perl/Tidy/Formatter.pm

   1 #####################################################################
   2 #
   3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
   4 # line breaks to the token stream
   5 #
   6 #####################################################################
   7
   8 # Index...
   9 # CODE SECTION 1: Preliminary code, global definitions and sub new
  10 #                 sub new
  11 # CODE SECTION 2: Some Basic Utilities
  12 # CODE SECTION 3: Check and process options
  13 #                 sub check_options
  14 # CODE SECTION 4: Receive lines from the tokenizer
  15 #                 sub write_line
  16 # CODE SECTION 5: Pre-process the entire file
  17 #                 sub finish_formatting
  18 # CODE SECTION 6: Process line-by-line
  19 #                 sub process_all_lines
  20 # CODE SECTION 7: Process lines of code
  21 #                 process_line_of_CODE
  22 # CODE SECTION 8: Utilities for setting breakpoints
  23 #                 sub set_forced_breakpoint
  24 # CODE SECTION 9: Process batches of code
  25 #                 sub grind_batch_of_CODE
  26 # CODE SECTION 10: Code to break long statements
  27 #                  sub break_long_lines
  28 # CODE SECTION 11: Code to break long lists
  29 #                  sub break_lists
  30 # CODE SECTION 12: Code for setting indentation
  31 # CODE SECTION 13: Preparing batch of lines for vertical alignment
  32 #                  sub convey_batch_to_vertical_aligner
  33 # CODE SECTION 14: Code for creating closing side comments
  34 #                  sub add_closing_side_comment
  35 # CODE SECTION 15: Summarize
  36 #                  sub wrapup
  37
  38 #######################################################################
  39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
  40 #######################################################################
  41
  42 package Perl::Tidy::Formatter;
  43 use strict;
  44 use warnings;
  45
  46 # DEVEL_MODE gets switched on during automated testing for extra checking
  47 use constant DEVEL_MODE   => 0;
  48 use constant EMPTY_STRING => q{};
  49 use constant SPACE        => q{ };
  50
  51 { #<<< A non-indenting brace to contain all lexical variables
  52
  53 use Carp;
  54 use English qw( -no_match_vars );
  55 our $VERSION = '20220613';
  56
  57 # The Tokenizer will be loaded with the Formatter
  58 ##use Perl::Tidy::Tokenizer;    # for is_keyword()
  59
  60 sub AUTOLOAD {
  61
  62     # Catch any undefined sub calls so that we are sure to get
  63     # some diagnostic information.  This sub should never be called
  64     # except for a programming error.
  65     our $AUTOLOAD;
  66     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
  67     my ( $pkg, $fname, $lno ) = caller();
  68     my $my_package = __PACKAGE__;
  69     print STDERR <<EOM;
  70 ======================================================================
  71 Error detected in package '$my_package', version $VERSION
  72 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
  73 Called from package: '$pkg'
  74 Called from File '$fname'  at line '$lno'
  75 This error is probably due to a recent programming change
  76 ======================================================================
  77 EOM
  78     exit 1;
  79 } ## end sub AUTOLOAD
  80
  81 sub DESTROY {
  82     my $self = shift;
  83     $self->_decrement_count();
  84     return;
  85 }
  86
  87 sub Die {
  88     my ($msg) = @_;
  89     Perl::Tidy::Die($msg);
  90     croak "unexpected return from Perl::Tidy::Die";
  91 }
  92
  93 sub Warn {
  94     my ($msg) = @_;
  95     Perl::Tidy::Warn($msg);
  96     return;
  97 }
  98
  99 sub Fault {
 100     my ($msg) = @_;
 101
 102     # This routine is called for errors that really should not occur
 103     # except if there has been a bug introduced by a recent program change.
 104     # Please add comments at calls to Fault to explain why the call
 105     # should not occur, and where to look to fix it.
 106     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
 107     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
 108     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
 109     my $input_stream_name = get_input_stream_name();
 110
 111     Die(<<EOM);
 112 ==============================================================================
 113 While operating on input stream with name: '$input_stream_name'
 114 A fault was detected at line $line0 of sub '$subroutine1'
 115 in file '$filename1'
 116 which was called from line $line1 of sub '$subroutine2'
 117 Message: '$msg'
 118 This is probably an error introduced by a recent programming change.
 119 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
 120 ==============================================================================
 121 EOM
 122
 123     # We shouldn't get here, but this return is to keep Perl-Critic from
 124     # complaining.
 125     return;
 126 } ## end sub Fault
 127
 128 sub Exit {
 129     my ($msg) = @_;
 130     Perl::Tidy::Exit($msg);
 131     croak "unexpected return from Perl::Tidy::Exit";
 132 }
 133
 134 # Global variables ...
 135 my (
 136
 137     #-----------------------------------------------------------------
 138     # Section 1: Global variables which are either always constant or
 139     # are constant after being configured by user-supplied
 140     # parameters.  They remain constant as a file is being processed.
 141     #-----------------------------------------------------------------
 142
 143     # user parameters and shortcuts
 144     $rOpts,
 145     $rOpts_add_newlines,
 146     $rOpts_add_whitespace,
 147     $rOpts_blank_lines_after_opening_block,
 148     $rOpts_block_brace_tightness,
 149     $rOpts_block_brace_vertical_tightness,
 150     $rOpts_break_after_labels,
 151     $rOpts_break_at_old_attribute_breakpoints,
 152     $rOpts_break_at_old_comma_breakpoints,
 153     $rOpts_break_at_old_keyword_breakpoints,
 154     $rOpts_break_at_old_logical_breakpoints,
 155     $rOpts_break_at_old_semicolon_breakpoints,
 156     $rOpts_break_at_old_ternary_breakpoints,
 157     $rOpts_break_open_compact_parens,
 158     $rOpts_closing_side_comments,
 159     $rOpts_closing_side_comment_else_flag,
 160     $rOpts_closing_side_comment_maximum_text,
 161     $rOpts_comma_arrow_breakpoints,
 162     $rOpts_continuation_indentation,
 163     $rOpts_delete_closing_side_comments,
 164     $rOpts_delete_old_whitespace,
 165     $rOpts_delete_side_comments,
 166     $rOpts_extended_continuation_indentation,
 167     $rOpts_format_skipping,
 168     $rOpts_freeze_whitespace,
 169     $rOpts_function_paren_vertical_alignment,
 170     $rOpts_fuzzy_line_length,
 171     $rOpts_ignore_old_breakpoints,
 172     $rOpts_ignore_side_comment_lengths,
 173     $rOpts_indent_closing_brace,
 174     $rOpts_indent_columns,
 175     $rOpts_indent_only,
 176     $rOpts_keep_interior_semicolons,
 177     $rOpts_line_up_parentheses,
 178     $rOpts_logical_padding,
 179     $rOpts_maximum_consecutive_blank_lines,
 180     $rOpts_maximum_fields_per_table,
 181     $rOpts_maximum_line_length,
 182     $rOpts_one_line_block_semicolons,
 183     $rOpts_opening_brace_always_on_right,
 184     $rOpts_outdent_keywords,
 185     $rOpts_outdent_labels,
 186     $rOpts_outdent_long_comments,
 187     $rOpts_outdent_long_quotes,
 188     $rOpts_outdent_static_block_comments,
 189     $rOpts_recombine,
 190     $rOpts_short_concatenation_item_length,
 191     $rOpts_stack_closing_block_brace,
 192     $rOpts_static_block_comments,
 193     $rOpts_sub_alias_list,
 194     $rOpts_tee_block_comments,
 195     $rOpts_tee_pod,
 196     $rOpts_tee_side_comments,
 197     $rOpts_variable_maximum_line_length,
 198     $rOpts_valign,
 199     $rOpts_valign_code,
 200     $rOpts_valign_side_comments,
 201     $rOpts_whitespace_cycle,
 202     $rOpts_extended_line_up_parentheses,
 203
 204     # Static hashes initialized in a BEGIN block
 205     %is_assignment,
 206     %is_if_unless_and_or_last_next_redo_return,
 207     %is_if_elsif_else_unless_while_until_for_foreach,
 208     %is_if_unless_while_until_for_foreach,
 209     %is_last_next_redo_return,
 210     %is_if_unless,
 211     %is_if_elsif,
 212     %is_if_unless_elsif,
 213     %is_if_unless_elsif_else,
 214     %is_elsif_else,
 215     %is_and_or,
 216     %is_chain_operator,
 217     %is_block_without_semicolon,
 218     %ok_to_add_semicolon_for_block_type,
 219     %is_opening_type,
 220     %is_closing_type,
 221     %is_opening_token,
 222     %is_closing_token,
 223     %is_ternary,
 224     %is_equal_or_fat_comma,
 225     %is_counted_type,
 226     %is_opening_sequence_token,
 227     %is_closing_sequence_token,
 228     %is_container_label_type,
 229     %is_die_confess_croak_warn,
 230     %is_my_our_local,
 231
 232     @all_operators,
 233
 234     # Initialized in check_options. These are constants and could
 235     # just as well be initialized in a BEGIN block.
 236     %is_do_follower,
 237     %is_anon_sub_brace_follower,
 238     %is_anon_sub_1_brace_follower,
 239     %is_other_brace_follower,
 240
 241     # Initialized and re-initialized in sub initialize_grep_and_friends;
 242     # These can be modified by grep-alias-list
 243     %is_sort_map_grep,
 244     %is_sort_map_grep_eval,
 245     %is_sort_map_grep_eval_do,
 246     %is_block_with_ci,
 247     %is_keyword_returning_list,
 248     %block_type_map,
 249
 250     # Initialized in sub initialize_whitespace_hashes;
 251     # Some can be modified according to user parameters.
 252     %binary_ws_rules,
 253     %want_left_space,
 254     %want_right_space,
 255
 256     # Configured in sub initialize_bond_strength_hashes
 257     %right_bond_strength,
 258     %left_bond_strength,
 259
 260     # Hashes for -kbb=s and -kba=s
 261     %keep_break_before_type,
 262     %keep_break_after_type,
 263
 264     # Initialized in check_options, modified by prepare_cuddled_block_types:
 265     %want_one_line_block,
 266
 267     # Initialized in sub prepare_cuddled_block_types
 268     $rcuddled_block_types,
 269
 270     # Initialized and configured in check_options
 271     %outdent_keyword,
 272     %keyword_paren_inner_tightness,
 273
 274     %want_break_before,
 275
 276     %break_before_container_types,
 277     %container_indentation_options,
 278
 279     %space_after_keyword,
 280
 281     %tightness,
 282     %matching_token,
 283
 284     %opening_vertical_tightness,
 285     %closing_vertical_tightness,
 286     %closing_token_indentation,
 287     $some_closing_token_indentation,
 288
 289     %opening_token_right,
 290     %stack_opening_token,
 291     %stack_closing_token,
 292
 293     %weld_nested_exclusion_rules,
 294     %line_up_parentheses_control_hash,
 295     $line_up_parentheses_control_is_lxpl,
 296
 297     # regex patterns for text identification.
 298     # Most are initialized in a sub make_**_pattern during configuration.
 299     # Most can be configured by user parameters.
 300     $SUB_PATTERN,
 301     $ASUB_PATTERN,
 302     $static_block_comment_pattern,
 303     $static_side_comment_pattern,
 304     $format_skipping_pattern_begin,
 305     $format_skipping_pattern_end,
 306     $non_indenting_brace_pattern,
 307     $bl_exclusion_pattern,
 308     $bl_pattern,
 309     $bli_exclusion_pattern,
 310     $bli_pattern,
 311     $block_brace_vertical_tightness_pattern,
 312     $blank_lines_after_opening_block_pattern,
 313     $blank_lines_before_closing_block_pattern,
 314     $keyword_group_list_pattern,
 315     $keyword_group_list_comment_pattern,
 316     $closing_side_comment_prefix_pattern,
 317     $closing_side_comment_list_pattern,
 318
 319     # Table to efficiently find indentation and max line length
 320     # from level.
 321     @maximum_line_length_at_level,
 322     @maximum_text_length_at_level,
 323     $stress_level_alpha,
 324     $stress_level_beta,
 325
 326     # Total number of sequence items in a weld, for quick checks
 327     $total_weld_count,
 328
 329     #--------------------------------------------------------
 330     # Section 2: Work arrays for the current batch of tokens.
 331     #--------------------------------------------------------
 332
 333     # These are re-initialized for each batch of code
 334     # in sub initialize_batch_variables.
 335     $max_index_to_go,
 336     @block_type_to_go,
 337     @type_sequence_to_go,
 338     @forced_breakpoint_to_go,
 339     @token_lengths_to_go,
 340     @summed_lengths_to_go,
 341     @levels_to_go,
 342     @leading_spaces_to_go,
 343     @reduced_spaces_to_go,
 344     @mate_index_to_go,
 345     @ci_levels_to_go,
 346     @nesting_depth_to_go,
 347     @nobreak_to_go,
 348     @old_breakpoint_to_go,
 349     @tokens_to_go,
 350     @K_to_go,
 351     @types_to_go,
 352     @inext_to_go,
 353     @iprev_to_go,
 354     @parent_seqno_to_go,
 355
 356     # forced breakpoint variables associated with each batch of code
 357     $forced_breakpoint_count,
 358     $forced_breakpoint_undo_count,
 359     $index_max_forced_break,
 360 );
 361
 362 BEGIN {
 363
 364     # Index names for token variables.
 365     # Do not combine with other BEGIN blocks (c101).
 366     my $i = 0;
 367     use constant {
 368         _CI_LEVEL_          => $i++,
 369         _CUMULATIVE_LENGTH_ => $i++,
 370         _LINE_INDEX_        => $i++,
 371         _KNEXT_SEQ_ITEM_    => $i++,
 372         _LEVEL_             => $i++,
 373         _TOKEN_             => $i++,
 374         _TOKEN_LENGTH_      => $i++,
 375         _TYPE_              => $i++,
 376         _TYPE_SEQUENCE_     => $i++,
 377
 378         # Number of token variables; must be last in list:
 379         _NVARS => $i++,
 380     };
 381 }
 382
 383 BEGIN {
 384
 385     # Index names for $self variables.
 386     # Do not combine with other BEGIN blocks (c101).
 387     my $i = 0;
 388     use constant {
 389         _rlines_                    => $i++,
 390         _rlines_new_                => $i++,
 391         _rLL_                       => $i++,
 392         _Klimit_                    => $i++,
 393         _rdepth_of_opening_seqno_   => $i++,
 394         _rSS_                       => $i++,
 395         _Iss_opening_               => $i++,
 396         _Iss_closing_               => $i++,
 397         _rblock_type_of_seqno_      => $i++,
 398         _ris_asub_block_            => $i++,
 399         _ris_sub_block_             => $i++,
 400         _K_opening_container_       => $i++,
 401         _K_closing_container_       => $i++,
 402         _K_opening_ternary_         => $i++,
 403         _K_closing_ternary_         => $i++,
 404         _K_first_seq_item_          => $i++,
 405         _rK_phantom_semicolons_     => $i++,
 406         _rtype_count_by_seqno_      => $i++,
 407         _ris_function_call_paren_   => $i++,
 408         _rlec_count_by_seqno_       => $i++,
 409         _ris_broken_container_      => $i++,
 410         _ris_permanently_broken_    => $i++,
 411         _rhas_list_                 => $i++,
 412         _rhas_broken_list_          => $i++,
 413         _rhas_broken_list_with_lec_ => $i++,
 414         _rhas_code_block_           => $i++,
 415         _rhas_broken_code_block_    => $i++,
 416         _rhas_ternary_              => $i++,
 417         _ris_excluded_lp_container_ => $i++,
 418         _rlp_object_by_seqno_       => $i++,
 419         _rwant_reduced_ci_          => $i++,
 420         _rno_xci_by_seqno_          => $i++,
 421         _rbrace_left_               => $i++,
 422         _ris_bli_container_         => $i++,
 423         _rparent_of_seqno_          => $i++,
 424         _rchildren_of_seqno_        => $i++,
 425         _ris_list_by_seqno_         => $i++,
 426         _rbreak_container_          => $i++,
 427         _rshort_nested_             => $i++,
 428         _length_function_           => $i++,
 429         _is_encoded_data_           => $i++,
 430         _fh_tee_                    => $i++,
 431         _sink_object_               => $i++,
 432         _file_writer_object_        => $i++,
 433         _vertical_aligner_object_   => $i++,
 434         _logger_object_             => $i++,
 435         _radjusted_levels_          => $i++,
 436         _this_batch_                => $i++,
 437
 438         _last_output_short_opening_token_ => $i++,
 439
 440         _last_line_leading_type_       => $i++,
 441         _last_line_leading_level_      => $i++,
 442         _last_last_line_leading_level_ => $i++,
 443
 444         _added_semicolon_count_    => $i++,
 445         _first_added_semicolon_at_ => $i++,
 446         _last_added_semicolon_at_  => $i++,
 447
 448         _deleted_semicolon_count_    => $i++,
 449         _first_deleted_semicolon_at_ => $i++,
 450         _last_deleted_semicolon_at_  => $i++,
 451
 452         _embedded_tab_count_    => $i++,
 453         _first_embedded_tab_at_ => $i++,
 454         _last_embedded_tab_at_  => $i++,
 455
 456         _first_tabbing_disagreement_       => $i++,
 457         _last_tabbing_disagreement_        => $i++,
 458         _tabbing_disagreement_count_       => $i++,
 459         _in_tabbing_disagreement_          => $i++,
 460         _first_brace_tabbing_disagreement_ => $i++,
 461         _in_brace_tabbing_disagreement_    => $i++,
 462
 463         _saw_VERSION_in_this_file_ => $i++,
 464         _saw_END_or_DATA_          => $i++,
 465
 466         _rK_weld_left_         => $i++,
 467         _rK_weld_right_        => $i++,
 468         _rweld_len_right_at_K_ => $i++,
 469
 470         _rspecial_side_comment_type_ => $i++,
 471
 472         _rseqno_controlling_my_ci_    => $i++,
 473         _ris_seqno_controlling_ci_    => $i++,
 474         _save_logfile_                => $i++,
 475         _maximum_level_               => $i++,
 476         _maximum_level_at_line_       => $i++,
 477         _maximum_BLOCK_level_         => $i++,
 478         _maximum_BLOCK_level_at_line_ => $i++,
 479
 480         _rKrange_code_without_comments_ => $i++,
 481         _rbreak_before_Kfirst_          => $i++,
 482         _rbreak_after_Klast_            => $i++,
 483         _rwant_container_open_          => $i++,
 484         _converged_                     => $i++,
 485
 486         _rstarting_multiline_qw_seqno_by_K_ => $i++,
 487         _rending_multiline_qw_seqno_by_K_   => $i++,
 488         _rKrange_multiline_qw_by_seqno_     => $i++,
 489         _rmultiline_qw_has_extra_level_     => $i++,
 490
 491         _rcollapsed_length_by_seqno_       => $i++,
 492         _rbreak_before_container_by_seqno_ => $i++,
 493         _ris_essential_old_breakpoint_     => $i++,
 494         _roverride_cab3_                   => $i++,
 495         _ris_assigned_structure_           => $i++,
 496
 497         _rseqno_non_indenting_brace_by_ix_    => $i++,
 498         _rreduce_vertical_tightness_by_seqno_ => $i++,
 499
 500         _LAST_SELF_INDEX_ => $i - 1,
 501     };
 502 }
 503
 504 BEGIN {
 505
 506     # Index names for batch variables.
 507     # Do not combine with other BEGIN blocks (c101).
 508     # These are stored in _this_batch_, which is a sub-array of $self.
 509     my $i = 0;
 510     use constant {
 511         _starting_in_quote_          => $i++,
 512         _ending_in_quote_            => $i++,
 513         _is_static_block_comment_    => $i++,
 514         _ri_first_                   => $i++,
 515         _ri_last_                    => $i++,
 516         _do_not_pad_                 => $i++,
 517         _peak_batch_size_            => $i++,
 518         _batch_count_                => $i++,
 519         _rix_seqno_controlling_ci_   => $i++,
 520         _batch_CODE_type_            => $i++,
 521         _ri_starting_one_line_block_ => $i++,
 522     };
 523 }
 524
 525 BEGIN {
 526
 527     # Sequence number assigned to the root of sequence tree.
 528     # The minimum of the actual sequences numbers is 4, so we can use 1
 529     use constant SEQ_ROOT => 1;
 530
 531     # Codes for insertion and deletion of blanks
 532     use constant DELETE => 0;
 533     use constant STABLE => 1;
 534     use constant INSERT => 2;
 535
 536     # whitespace codes
 537     use constant WS_YES      => 1;
 538     use constant WS_OPTIONAL => 0;
 539     use constant WS_NO       => -1;
 540
 541     # Token bond strengths.
 542     use constant NO_BREAK    => 10_000;
 543     use constant VERY_STRONG => 100;
 544     use constant STRONG      => 2.1;
 545     use constant NOMINAL     => 1.1;
 546     use constant WEAK        => 0.8;
 547     use constant VERY_WEAK   => 0.55;
 548
 549     # values for testing indexes in output array
 550     use constant UNDEFINED_INDEX => -1;
 551
 552     # Maximum number of little messages; probably need not be changed.
 553     use constant MAX_NAG_MESSAGES => 6;
 554
 555     # This is the decimal range of printable characters in ASCII.  It is used to
 556     # make quick preliminary checks before resorting to using a regex.
 557     use constant ORD_PRINTABLE_MIN => 33;
 558     use constant ORD_PRINTABLE_MAX => 126;
 559
 560     # Initialize constant hashes ...
 561     my @q;
 562
 563     @q = qw(
 564       = **= += *= &= <<= &&=
 565       -= /= |= >>= ||= //=
 566       .= %= ^=
 567       x=
 568     );
 569     @is_assignment{@q} = (1) x scalar(@q);
 570
 571     @q = qw(is if unless and or err last next redo return);
 572     @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
 573
 574     # These block types may have text between the keyword and opening
 575     # curly.  Note: 'else' does not, but must be included to allow trailing
 576     # if/elsif text to be appended.
 577     # patch for SWITCH/CASE: added 'case' and 'when'
 578     @q = qw(if elsif else unless while until for foreach case when catch);
 579     @is_if_elsif_else_unless_while_until_for_foreach{@q} =
 580       (1) x scalar(@q);
 581
 582     @q = qw(if unless while until for foreach);
 583     @is_if_unless_while_until_for_foreach{@q} =
 584       (1) x scalar(@q);
 585
 586     @q = qw(last next redo return);
 587     @is_last_next_redo_return{@q} = (1) x scalar(@q);
 588
 589     # Map related block names into a common name to allow vertical alignment
 590     # used by sub make_alignment_patterns. Note: this is normally unchanged,
 591     # but it contains 'grep' and can be re-initialized in
 592     # sub initialize_grep_and_friends in a testing mode.
 593     %block_type_map = (
 594         'unless'  => 'if',
 595         'else'    => 'if',
 596         'elsif'   => 'if',
 597         'when'    => 'if',
 598         'default' => 'if',
 599         'case'    => 'if',
 600         'sort'    => 'map',
 601         'grep'    => 'map',
 602     );
 603
 604     @q = qw(if unless);
 605     @is_if_unless{@q} = (1) x scalar(@q);
 606
 607     @q = qw(if elsif);
 608     @is_if_elsif{@q} = (1) x scalar(@q);
 609
 610     @q = qw(if unless elsif);
 611     @is_if_unless_elsif{@q} = (1) x scalar(@q);
 612
 613     @q = qw(if unless elsif else);
 614     @is_if_unless_elsif_else{@q} = (1) x scalar(@q);
 615
 616     @q = qw(elsif else);
 617     @is_elsif_else{@q} = (1) x scalar(@q);
 618
 619     @q = qw(and or err);
 620     @is_and_or{@q} = (1) x scalar(@q);
 621
 622     # Identify certain operators which often occur in chains.
 623     # Note: the minus (-) causes a side effect of padding of the first line in
 624     # something like this (by sub set_logical_padding):
 625     #    Checkbutton => 'Transmission checked',
 626     #   -variable    => \$TRANS
 627     # This usually improves appearance so it seems ok.
 628     @q = qw(&& || and or : ? . + - * /);
 629     @is_chain_operator{@q} = (1) x scalar(@q);
 630
 631     # Operators that the user can request break before or after.
 632     # Note that some are keywords
 633     @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
 634       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
 635       . : ? && || and or err xor
 636     );
 637
 638     # We can remove semicolons after blocks preceded by these keywords
 639     @q =
 640       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
 641       unless while until for foreach given when default);
 642     @is_block_without_semicolon{@q} = (1) x scalar(@q);
 643
 644     # We will allow semicolons to be added within these block types
 645     # as well as sub and package blocks.
 646     # NOTES:
 647     # 1. Note that these keywords are omitted:
 648     #     switch case given when default sort map grep
 649     # 2. It is also ok to add for sub and package blocks and a labeled block
 650     # 3. But not okay for other perltidy types including:
 651     #     { } ; G t
 652     # 4. Test files: blktype.t, blktype1.t, semicolon.t
 653     @q =
 654       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
 655       unless do while until eval for foreach );
 656     @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
 657
 658     # 'L' is token for opening { at hash key
 659     @q = qw< L { ( [ >;
 660     @is_opening_type{@q} = (1) x scalar(@q);
 661
 662     # 'R' is token for closing } at hash key
 663     @q = qw< R } ) ] >;
 664     @is_closing_type{@q} = (1) x scalar(@q);
 665
 666     @q = qw< { ( [ >;
 667     @is_opening_token{@q} = (1) x scalar(@q);
 668
 669     @q = qw< } ) ] >;
 670     @is_closing_token{@q} = (1) x scalar(@q);
 671
 672     @q = qw( ? : );
 673     @is_ternary{@q} = (1) x scalar(@q);
 674
 675     @q = qw< { ( [ ? >;
 676     @is_opening_sequence_token{@q} = (1) x scalar(@q);
 677
 678     @q = qw< } ) ] : >;
 679     @is_closing_sequence_token{@q} = (1) x scalar(@q);
 680
 681     # a hash needed by sub break_lists for labeling containers
 682     @q = qw( k => && || ? : . );
 683     @is_container_label_type{@q} = (1) x scalar(@q);
 684
 685     @q = qw( die confess croak warn );
 686     @is_die_confess_croak_warn{@q} = (1) x scalar(@q);
 687
 688     @q = qw( my our local );
 689     @is_my_our_local{@q} = (1) x scalar(@q);
 690
 691     # Braces -bbht etc must follow these. Note: experimentation with
 692     # including a simple comma shows that it adds little and can lead
 693     # to poor formatting in complex lists.
 694     @q = qw( = => );
 695     @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
 696
 697     @q = qw( => ; h f );
 698     push @q, ',';
 699     @is_counted_type{@q} = (1) x scalar(@q);
 700
 701 }
 702
 703 {    ## begin closure to count instances
 704
 705     # methods to count instances
 706     my $_count = 0;
 707     sub get_count        { return $_count; }
 708     sub _increment_count { return ++$_count }
 709     sub _decrement_count { return --$_count }
 710 } ## end closure to count instances
 711
 712 sub new {
 713
 714     my ( $class, @args ) = @_;
 715
 716     # we are given an object with a write_line() method to take lines
 717     my %defaults = (
 718         sink_object        => undef,
 719         diagnostics_object => undef,
 720         logger_object      => undef,
 721         length_function    => sub { return length( $_[0] ) },
 722         is_encoded_data    => EMPTY_STRING,
 723         fh_tee             => undef,
 724     );
 725     my %args = ( %defaults, @args );
 726
 727     my $length_function    = $args{length_function};
 728     my $is_encoded_data    = $args{is_encoded_data};
 729     my $fh_tee             = $args{fh_tee};
 730     my $logger_object      = $args{logger_object};
 731     my $diagnostics_object = $args{diagnostics_object};
 732
 733     # we create another object with a get_line() and peek_ahead() method
 734     my $sink_object = $args{sink_object};
 735     my $file_writer_object =
 736       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
 737
 738     # initialize closure variables...
 739     set_logger_object($logger_object);
 740     set_diagnostics_object($diagnostics_object);
 741     initialize_lp_vars();
 742     initialize_csc_vars();
 743     initialize_break_lists();
 744     initialize_undo_ci();
 745     initialize_process_line_of_CODE();
 746     initialize_grind_batch_of_CODE();
 747     initialize_final_indentation_adjustment();
 748     initialize_postponed_breakpoint();
 749     initialize_batch_variables();
 750     initialize_write_line();
 751
 752     my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
 753         rOpts              => $rOpts,
 754         file_writer_object => $file_writer_object,
 755         logger_object      => $logger_object,
 756         diagnostics_object => $diagnostics_object,
 757         length_function    => $length_function
 758     );
 759
 760     write_logfile_entry("\nStarting tokenization pass...\n");
 761
 762     if ( $rOpts->{'entab-leading-whitespace'} ) {
 763         write_logfile_entry(
 764 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
 765         );
 766     }
 767     elsif ( $rOpts->{'tabs'} ) {
 768         write_logfile_entry("Indentation will be with a tab character\n");
 769     }
 770     else {
 771         write_logfile_entry(
 772             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
 773     }
 774
 775     # Initialize the $self array reference.
 776     # To add an item, first add a constant index in the BEGIN block above.
 777     my $self = [];
 778
 779     # Basic data structures...
 780     $self->[_rlines_]     = [];    # = ref to array of lines of the file
 781     $self->[_rlines_new_] = [];    # = ref to array of output lines
 782
 783     # 'rLL' = reference to the continuous liner array of all tokens in a file.
 784     # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
 785     # 'LL' stuck because it is easy to type.  The 'rLL' array is updated
 786     # by sub 'respace_tokens' during reformatting.  The indexes in 'rLL' begin
 787     # with '$K' by convention.
 788     $self->[_rLL_]    = [];
 789     $self->[_Klimit_] = undef;    # = maximum K index for rLL.
 790
 791     # Indexes into the rLL list
 792     $self->[_K_opening_container_] = {};
 793     $self->[_K_closing_container_] = {};
 794     $self->[_K_opening_ternary_]   = {};
 795     $self->[_K_closing_ternary_]   = {};
 796     $self->[_K_first_seq_item_]    = undef; # K of first token with a sequence #
 797
 798     # Array of phantom semicolons, in case we ever need to undo them
 799     $self->[_rK_phantom_semicolons_] = undef;
 800
 801     # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
 802     # numbers with + or - indicating opening or closing. This list represents
 803     # the entire container tree and is invariant under reformatting.  It can be
 804     # used to quickly travel through the tree.  Indexes in the rSS array begin
 805     # with '$I' by convention.  The 'Iss' arrays give the indexes in this list
 806     # of opening and closing sequence numbers.
 807     $self->[_rSS_]         = [];
 808     $self->[_Iss_opening_] = [];
 809     $self->[_Iss_closing_] = [];
 810
 811     # Arrays to help traverse the tree
 812     $self->[_rdepth_of_opening_seqno_] = [];
 813     $self->[_rblock_type_of_seqno_]    = {};
 814     $self->[_ris_asub_block_]          = {};
 815     $self->[_ris_sub_block_]           = {};
 816
 817     # Mostly list characteristics and processing flags
 818     $self->[_rtype_count_by_seqno_]      = {};
 819     $self->[_ris_function_call_paren_]   = {};
 820     $self->[_rlec_count_by_seqno_]       = {};
 821     $self->[_ris_broken_container_]      = {};
 822     $self->[_ris_permanently_broken_]    = {};
 823     $self->[_rhas_list_]                 = {};
 824     $self->[_rhas_broken_list_]          = {};
 825     $self->[_rhas_broken_list_with_lec_] = {};
 826     $self->[_rhas_code_block_]           = {};
 827     $self->[_rhas_broken_code_block_]    = {};
 828     $self->[_rhas_ternary_]              = {};
 829     $self->[_ris_excluded_lp_container_] = {};
 830     $self->[_rlp_object_by_seqno_]       = {};
 831     $self->[_rwant_reduced_ci_]          = {};
 832     $self->[_rno_xci_by_seqno_]          = {};
 833     $self->[_rbrace_left_]               = {};
 834     $self->[_ris_bli_container_]         = {};
 835     $self->[_rparent_of_seqno_]          = {};
 836     $self->[_rchildren_of_seqno_]        = {};
 837     $self->[_ris_list_by_seqno_]         = {};
 838
 839     $self->[_rbreak_container_] = {};                 # prevent one-line blocks
 840     $self->[_rshort_nested_]    = {};                 # blocks not forced open
 841     $self->[_length_function_]  = $length_function;
 842     $self->[_is_encoded_data_]  = $is_encoded_data;
 843
 844     # Some objects...
 845     $self->[_fh_tee_]                  = $fh_tee;
 846     $self->[_sink_object_]             = $sink_object;
 847     $self->[_file_writer_object_]      = $file_writer_object;
 848     $self->[_vertical_aligner_object_] = $vertical_aligner_object;
 849     $self->[_logger_object_]           = $logger_object;
 850
 851     # Reference to the batch being processed
 852     $self->[_this_batch_] = [];
 853
 854     # Memory of processed text...
 855     $self->[_last_last_line_leading_level_]    = 0;
 856     $self->[_last_line_leading_level_]         = 0;
 857     $self->[_last_line_leading_type_]          = '#';
 858     $self->[_last_output_short_opening_token_] = 0;
 859     $self->[_added_semicolon_count_]           = 0;
 860     $self->[_first_added_semicolon_at_]        = 0;
 861     $self->[_last_added_semicolon_at_]         = 0;
 862     $self->[_deleted_semicolon_count_]         = 0;
 863     $self->[_first_deleted_semicolon_at_]      = 0;
 864     $self->[_last_deleted_semicolon_at_]       = 0;
 865     $self->[_embedded_tab_count_]              = 0;
 866     $self->[_first_embedded_tab_at_]           = 0;
 867     $self->[_last_embedded_tab_at_]            = 0;
 868     $self->[_first_tabbing_disagreement_]      = 0;
 869     $self->[_last_tabbing_disagreement_]       = 0;
 870     $self->[_tabbing_disagreement_count_]      = 0;
 871     $self->[_in_tabbing_disagreement_]         = 0;
 872     $self->[_saw_VERSION_in_this_file_]        = !$rOpts->{'pass-version-line'};
 873     $self->[_saw_END_or_DATA_]                 = 0;
 874     $self->[_first_brace_tabbing_disagreement_] = undef;
 875     $self->[_in_brace_tabbing_disagreement_]    = undef;
 876
 877     # Hashes related to container welding...
 878     $self->[_radjusted_levels_] = [];
 879
 880     # Weld data structures
 881     $self->[_rK_weld_left_]         = {};
 882     $self->[_rK_weld_right_]        = {};
 883     $self->[_rweld_len_right_at_K_] = {};
 884
 885     # -xci stuff
 886     $self->[_rseqno_controlling_my_ci_] = {};
 887     $self->[_ris_seqno_controlling_ci_] = {};
 888
 889     $self->[_rspecial_side_comment_type_]  = {};
 890     $self->[_maximum_level_]               = 0;
 891     $self->[_maximum_level_at_line_]       = 0;
 892     $self->[_maximum_BLOCK_level_]         = 0;
 893     $self->[_maximum_BLOCK_level_at_line_] = 0;
 894
 895     $self->[_rKrange_code_without_comments_] = [];
 896     $self->[_rbreak_before_Kfirst_]          = {};
 897     $self->[_rbreak_after_Klast_]            = {};
 898     $self->[_rwant_container_open_]          = {};
 899     $self->[_converged_]                     = 0;
 900
 901     # qw stuff
 902     $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
 903     $self->[_rending_multiline_qw_seqno_by_K_]   = {};
 904     $self->[_rKrange_multiline_qw_by_seqno_]     = {};
 905     $self->[_rmultiline_qw_has_extra_level_]     = {};
 906
 907     $self->[_rcollapsed_length_by_seqno_]       = {};
 908     $self->[_rbreak_before_container_by_seqno_] = {};
 909     $self->[_ris_essential_old_breakpoint_]     = {};
 910     $self->[_roverride_cab3_]                   = {};
 911     $self->[_ris_assigned_structure_]           = {};
 912
 913     $self->[_rseqno_non_indenting_brace_by_ix_]    = {};
 914     $self->[_rreduce_vertical_tightness_by_seqno_] = {};
 915
 916     # This flag will be updated later by a call to get_save_logfile()
 917     $self->[_save_logfile_] = defined($logger_object);
 918
 919     # Be sure all variables in $self have been initialized above.  To find the
 920     # correspondence of index numbers and array names, copy a list to a file
 921     # and use the unix 'nl' command to number lines 1..
 922     if (DEVEL_MODE) {
 923         my @non_existant;
 924         foreach ( 0 .. _LAST_SELF_INDEX_ ) {
 925             if ( !exists( $self->[$_] ) ) {
 926                 push @non_existant, $_;
 927             }
 928         }
 929         if (@non_existant) {
 930             Fault("These indexes in self not initialized: (@non_existant)\n");
 931         }
 932     }
 933
 934     bless $self, $class;
 935
 936     # Safety check..this is not a class yet
 937     if ( _increment_count() > 1 ) {
 938         confess
 939 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
 940     }
 941     return $self;
 942 } ## end sub new
 943
 944 ######################################
 945 # CODE SECTION 2: Some Basic Utilities
 946 ######################################
 947
 948 sub check_rLL {
 949
 950     # Verify that the rLL array has not been auto-vivified
 951     my ( $self, $msg ) = @_;
 952     my $rLL    = $self->[_rLL_];
 953     my $Klimit = $self->[_Klimit_];
 954     my $num    = @{$rLL};
 955     if (   ( defined($Klimit) && $Klimit != $num - 1 )
 956         || ( !defined($Klimit) && $num > 0 ) )
 957     {
 958
 959         # This fault can occur if the array has been accessed for an index
 960         # greater than $Klimit, which is the last token index.  Just accessing
 961         # the array above index $Klimit, not setting a value, can cause @rLL to
 962         # increase beyond $Klimit.  If this occurs, the problem can be located
 963         # by making calls to this routine at different locations in
 964         # sub 'finish_formatting'.
 965         $Klimit = 'undef' if ( !defined($Klimit) );
 966         $msg    = EMPTY_STRING unless $msg;
 967         Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
 968     }
 969     return;
 970 } ## end sub check_rLL
 971
 972 sub check_keys {
 973     my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
 974
 975     # Check the keys of a hash:
 976     # $rtest   = ref to hash to test
 977     # $rvalid  = ref to hash with valid keys
 978
 979     # $msg = a message to write in case of error
 980     # $exact_match defines the type of check:
 981     #     = false: test hash must not have unknown key
 982     #     = true:  test hash must have exactly same keys as known hash
 983     my @unknown_keys =
 984       grep { !exists $rvalid->{$_} } keys %{$rtest};
 985     my @missing_keys =
 986       grep { !exists $rtest->{$_} } keys %{$rvalid};
 987     my $error = @unknown_keys;
 988     if ($exact_match) { $error ||= @missing_keys }
 989     if ($error) {
 990         local $LIST_SEPARATOR = ')(';
 991         my @expected_keys = sort keys %{$rvalid};
 992         @unknown_keys = sort @unknown_keys;
 993         Fault(<<EOM);
 994 ------------------------------------------------------------------------
 995 Program error detected checking hash keys
 996 Message is: '$msg'
 997 Expected keys: (@expected_keys)
 998 Unknown key(s): (@unknown_keys)
 999 Missing key(s): (@missing_keys)
1000 ------------------------------------------------------------------------
1001 EOM
1002     }
1003     return;
1004 } ## end sub check_keys
1005
1006 sub check_token_array {
1007     my $self = shift;
1008
1009     # Check for errors in the array of tokens. This is only called
1010     # when the DEVEL_MODE flag is set, so this Fault will only occur
1011     # during code development.
1012     my $rLL = $self->[_rLL_];
1013     foreach my $KK ( 0 .. @{$rLL} - 1 ) {
1014         my $nvars = @{ $rLL->[$KK] };
1015         if ( $nvars != _NVARS ) {
1016             my $NVARS = _NVARS;
1017             my $type  = $rLL->[$KK]->[_TYPE_];
1018             $type = '*' unless defined($type);
1019
1020             # The number of variables per token node is _NVARS and was set when
1021             # the array indexes were generated. So if the number of variables
1022             # is different we have done something wrong, like not store all of
1023             # them in sub 'write_line' when they were received from the
1024             # tokenizer.
1025             Fault(
1026 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
1027             );
1028         }
1029         foreach my $var ( _TOKEN_, _TYPE_ ) {
1030             if ( !defined( $rLL->[$KK]->[$var] ) ) {
1031                 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
1032
1033                 # This is a simple check that each token has some basic
1034                 # variables.  In other words, that there are no holes in the
1035                 # array of tokens.  Sub 'write_line' pushes tokens into the
1036                 # $rLL array, so this should guarantee no gaps.
1037                 Fault("Undefined variable $var for K=$KK, line=$iline\n");
1038             }
1039         }
1040     }
1041     return;
1042 } ## end sub check_token_array
1043
1044 {    ## begin closure check_line_hashes
1045
1046     # This code checks that no autovivification occurs in the 'line' hash
1047
1048     my %valid_line_hash;
1049
1050     BEGIN {
1051
1052         # These keys are defined for each line in the formatter
1053         # Each line must have exactly these quantities
1054         my @valid_line_keys = qw(
1055           _curly_brace_depth
1056           _ending_in_quote
1057           _guessed_indentation_level
1058           _line_number
1059           _line_text
1060           _line_type
1061           _paren_depth
1062           _quote_character
1063           _rK_range
1064           _square_bracket_depth
1065           _starting_in_quote
1066           _ended_in_blank_token
1067           _code_type
1068
1069           _ci_level_0
1070           _level_0
1071           _nesting_blocks_0
1072           _nesting_tokens_0
1073         );
1074
1075         @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1076     }
1077
1078     sub check_line_hashes {
1079         my $self   = shift;
1080         my $rlines = $self->[_rlines_];
1081         foreach my $rline ( @{$rlines} ) {
1082             my $iline     = $rline->{_line_number};
1083             my $line_type = $rline->{_line_type};
1084             check_keys( $rline, \%valid_line_hash,
1085                 "Checkpoint: line number =$iline,  line_type=$line_type", 1 );
1086         }
1087         return;
1088     } ## end sub check_line_hashes
1089 } ## end closure check_line_hashes
1090
1091 {    ## begin closure for logger routines
1092     my $logger_object;
1093
1094     # Called once per file to initialize the logger object
1095     sub set_logger_object {
1096         $logger_object = shift;
1097         return;
1098     }
1099
1100     sub get_logger_object {
1101         return $logger_object;
1102     }
1103
1104     sub get_input_stream_name {
1105         my $input_stream_name = EMPTY_STRING;
1106         if ($logger_object) {
1107             $input_stream_name = $logger_object->get_input_stream_name();
1108         }
1109         return $input_stream_name;
1110     }
1111
1112     # interface to Perl::Tidy::Logger routines
1113     sub warning {
1114         my ($msg) = @_;
1115         if ($logger_object) { $logger_object->warning($msg); }
1116         return;
1117     }
1118
1119     sub complain {
1120         my ($msg) = @_;
1121         if ($logger_object) {
1122             $logger_object->complain($msg);
1123         }
1124         return;
1125     }
1126
1127     sub write_logfile_entry {
1128         my @msg = @_;
1129         if ($logger_object) {
1130             $logger_object->write_logfile_entry(@msg);
1131         }
1132         return;
1133     }
1134
1135     sub get_saw_brace_error {
1136         if ($logger_object) {
1137             return $logger_object->get_saw_brace_error();
1138         }
1139         return;
1140     }
1141
1142     sub we_are_at_the_last_line {
1143         if ($logger_object) {
1144             $logger_object->we_are_at_the_last_line();
1145         }
1146         return;
1147     }
1148
1149 } ## end closure for logger routines
1150
1151 {    ## begin closure for diagnostics routines
1152     my $diagnostics_object;
1153
1154     # Called once per file to initialize the diagnostics object
1155     sub set_diagnostics_object {
1156         $diagnostics_object = shift;
1157         return;
1158     }
1159
1160     sub write_diagnostics {
1161         my ($msg) = @_;
1162         if ($diagnostics_object) {
1163             $diagnostics_object->write_diagnostics($msg);
1164         }
1165         return;
1166     }
1167 } ## end closure for diagnostics routines
1168
1169 sub get_convergence_check {
1170     my ($self) = @_;
1171     return $self->[_converged_];
1172 }
1173
1174 sub get_added_semicolon_count {
1175     my $self = shift;
1176     return $self->[_added_semicolon_count_];
1177 }
1178
1179 sub get_output_line_number {
1180     my ($self) = @_;
1181     my $vao = $self->[_vertical_aligner_object_];
1182     return $vao->get_output_line_number();
1183 }
1184
1185 sub want_blank_line {
1186     my $self = shift;
1187     $self->flush();
1188     my $file_writer_object = $self->[_file_writer_object_];
1189     $file_writer_object->want_blank_line();
1190     return;
1191 }
1192
1193 sub write_unindented_line {
1194     my ( $self, $line ) = @_;
1195     $self->flush();
1196     my $file_writer_object = $self->[_file_writer_object_];
1197     $file_writer_object->write_line($line);
1198     return;
1199 }
1200
1201 sub consecutive_nonblank_lines {
1202     my ($self)             = @_;
1203     my $file_writer_object = $self->[_file_writer_object_];
1204     my $vao                = $self->[_vertical_aligner_object_];
1205     return $file_writer_object->get_consecutive_nonblank_lines() +
1206       $vao->get_cached_line_count();
1207 }
1208
1209 sub max {
1210     my (@vals) = @_;
1211     my $max = shift @vals;
1212     for (@vals) { $max = $_ > $max ? $_ : $max }
1213     return $max;
1214 }
1215
1216 sub min {
1217     my (@vals) = @_;
1218     my $min = shift @vals;
1219     for (@vals) { $min = $_ < $min ? $_ : $min }
1220     return $min;
1221 }
1222
1223 sub split_words {
1224
1225     # given a string containing words separated by whitespace,
1226     # return the list of words
1227     my ($str) = @_;
1228     return unless $str;
1229     $str =~ s/\s+$//;
1230     $str =~ s/^\s+//;
1231     return split( /\s+/, $str );
1232 } ## end sub split_words
1233
1234 ###########################################
1235 # CODE SECTION 3: Check and process options
1236 ###########################################
1237
1238 sub check_options {
1239
1240     # This routine is called to check the user-supplied run parameters
1241     # and to configure the control hashes to them.
1242     $rOpts = shift;
1243
1244     initialize_whitespace_hashes();
1245     initialize_bond_strength_hashes();
1246
1247     # This function must be called early to get hashes with grep initialized
1248     initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
1249
1250     # Make needed regex patterns for matching text.
1251     # NOTE: sub_matching_patterns must be made first because later patterns use
1252     # them; see RT #133130.
1253     make_sub_matching_pattern();
1254     make_static_block_comment_pattern();
1255     make_static_side_comment_pattern();
1256     make_closing_side_comment_prefix();
1257     make_closing_side_comment_list_pattern();
1258     $format_skipping_pattern_begin =
1259       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
1260     $format_skipping_pattern_end =
1261       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
1262     make_non_indenting_brace_pattern();
1263
1264     # If closing side comments ARE selected, then we can safely
1265     # delete old closing side comments unless closing side comment
1266     # warnings are requested.  This is a good idea because it will
1267     # eliminate any old csc's which fall below the line count threshold.
1268     # We cannot do this if warnings are turned on, though, because we
1269     # might delete some text which has been added.  So that must
1270     # be handled when comments are created.  And we cannot do this
1271     # with -io because -csc will be skipped altogether.
1272     if ( $rOpts->{'closing-side-comments'} ) {
1273         if (   !$rOpts->{'closing-side-comment-warnings'}
1274             && !$rOpts->{'indent-only'} )
1275         {
1276             $rOpts->{'delete-closing-side-comments'} = 1;
1277         }
1278     }
1279
1280     # If closing side comments ARE NOT selected, but warnings ARE
1281     # selected and we ARE DELETING csc's, then we will pretend to be
1282     # adding with a huge interval.  This will force the comments to be
1283     # generated for comparison with the old comments, but not added.
1284     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
1285         if ( $rOpts->{'delete-closing-side-comments'} ) {
1286             $rOpts->{'delete-closing-side-comments'}  = 0;
1287             $rOpts->{'closing-side-comments'}         = 1;
1288             $rOpts->{'closing-side-comment-interval'} = 100_000_000;
1289         }
1290     }
1291
1292     make_bli_pattern();
1293     make_bl_pattern();
1294     make_block_brace_vertical_tightness_pattern();
1295     make_blank_line_pattern();
1296     make_keyword_group_list_pattern();
1297
1298     # Make initial list of desired one line block types
1299     # They will be modified by 'prepare_cuddled_block_types'
1300     # NOTE: this line must come after is_sort_map_grep_eval is
1301     # initialized in sub 'initialize_grep_and_friends'
1302     %want_one_line_block = %is_sort_map_grep_eval;
1303
1304     prepare_cuddled_block_types();
1305     if ( $rOpts->{'dump-cuddled-block-list'} ) {
1306         dump_cuddled_block_list(*STDOUT);
1307         Exit(0);
1308     }
1309
1310     # -xlp implies -lp
1311     if ( $rOpts->{'extended-line-up-parentheses'} ) {
1312         $rOpts->{'line-up-parentheses'} ||= 1;
1313     }
1314
1315     if ( $rOpts->{'line-up-parentheses'} ) {
1316
1317         if (   $rOpts->{'indent-only'}
1318             || !$rOpts->{'add-newlines'}
1319             || !$rOpts->{'delete-old-newlines'} )
1320         {
1321             Warn(<<EOM);
1322 -----------------------------------------------------------------------
1323 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
1324
1325 The -lp indentation logic requires that perltidy be able to coordinate
1326 arbitrarily large numbers of line breakpoints.  This isn't possible
1327 with these flags.
1328 -----------------------------------------------------------------------
1329 EOM
1330             $rOpts->{'line-up-parentheses'}          = 0;
1331             $rOpts->{'extended-line-up-parentheses'} = 0;
1332         }
1333
1334         if ( $rOpts->{'whitespace-cycle'} ) {
1335             Warn(<<EOM);
1336 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
1337 EOM
1338             $rOpts->{'whitespace-cycle'} = 0;
1339         }
1340     }
1341
1342     # At present, tabs are not compatible with the line-up-parentheses style
1343     # (it would be possible to entab the total leading whitespace
1344     # just prior to writing the line, if desired).
1345     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
1346         Warn(<<EOM);
1347 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
1348 EOM
1349         $rOpts->{'tabs'} = 0;
1350     }
1351
1352     # Likewise, tabs are not compatible with outdenting..
1353     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
1354         Warn(<<EOM);
1355 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
1356 EOM
1357         $rOpts->{'tabs'} = 0;
1358     }
1359
1360     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
1361         Warn(<<EOM);
1362 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
1363 EOM
1364         $rOpts->{'tabs'} = 0;
1365     }
1366
1367     if ( !$rOpts->{'space-for-semicolon'} ) {
1368         $want_left_space{'f'} = -1;
1369     }
1370
1371     if ( $rOpts->{'space-terminal-semicolon'} ) {
1372         $want_left_space{';'} = 1;
1373     }
1374
1375     # We should put an upper bound on any -sil=n value. Otherwise enormous
1376     # files could be created by mistake.
1377     for ( $rOpts->{'starting-indentation-level'} ) {
1378         if ( $_ && $_ > 100 ) {
1379             Warn(<<EOM);
1380 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
1381 EOM
1382             $_ = 0;
1383         }
1384     }
1385
1386     # Require -msp > 0 to avoid future parsing problems (issue c147)
1387     for ( $rOpts->{'minimum-space-to-comment'} ) {
1388         if ( !$_ || $_ <= 0 ) { $_ = 1 }
1389     }
1390
1391     # implement outdenting preferences for keywords
1392     %outdent_keyword = ();
1393     my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
1394     unless (@okw) {
1395         @okw = qw(next last redo goto return);    # defaults
1396     }
1397
1398     # FUTURE: if not a keyword, assume that it is an identifier
1399     foreach (@okw) {
1400         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
1401             $outdent_keyword{$_} = 1;
1402         }
1403         else {
1404             Warn("ignoring '$_' in -okwl list; not a perl keyword");
1405         }
1406     }
1407
1408     # setup hash for -kpit option
1409     %keyword_paren_inner_tightness = ();
1410     my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
1411     if ( defined($kpit_value) && $kpit_value != 1 ) {
1412         my @kpit =
1413           split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
1414         unless (@kpit) {
1415             @kpit = qw(if elsif unless while until for foreach);    # defaults
1416         }
1417
1418         # we will allow keywords and user-defined identifiers
1419         foreach (@kpit) {
1420             $keyword_paren_inner_tightness{$_} = $kpit_value;
1421         }
1422     }
1423
1424     # implement user whitespace preferences
1425     if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
1426         @want_left_space{@q} = (1) x scalar(@q);
1427     }
1428
1429     if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
1430         @want_right_space{@q} = (1) x scalar(@q);
1431     }
1432
1433     if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
1434         @want_left_space{@q} = (-1) x scalar(@q);
1435     }
1436
1437     if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
1438         @want_right_space{@q} = (-1) x scalar(@q);
1439     }
1440     if ( $rOpts->{'dump-want-left-space'} ) {
1441         dump_want_left_space(*STDOUT);
1442         Exit(0);
1443     }
1444
1445     if ( $rOpts->{'dump-want-right-space'} ) {
1446         dump_want_right_space(*STDOUT);
1447         Exit(0);
1448     }
1449
1450     # default keywords for which space is introduced before an opening paren
1451     # (at present, including them messes up vertical alignment)
1452     my @sak = qw(my local our and or xor err eq ne if else elsif until
1453       unless while for foreach return switch case given when catch);
1454     %space_after_keyword = map { $_ => 1 } @sak;
1455
1456     # first remove any or all of these if desired
1457     if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
1458
1459         # -nsak='*' selects all the above keywords
1460         if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
1461         @space_after_keyword{@q} = (0) x scalar(@q);
1462     }
1463
1464     # then allow user to add to these defaults
1465     if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
1466         @space_after_keyword{@q} = (1) x scalar(@q);
1467     }
1468
1469     # implement user break preferences
1470     my $break_after = sub {
1471         my @toks = @_;
1472         foreach my $tok (@toks) {
1473             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
1474             my $lbs = $left_bond_strength{$tok};
1475             my $rbs = $right_bond_strength{$tok};
1476             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
1477                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1478                   ( $lbs, $rbs );
1479             }
1480         }
1481         return;
1482     };
1483
1484     my $break_before = sub {
1485         my @toks = @_;
1486         foreach my $tok (@toks) {
1487             my $lbs = $left_bond_strength{$tok};
1488             my $rbs = $right_bond_strength{$tok};
1489             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
1490                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
1491                   ( $lbs, $rbs );
1492             }
1493         }
1494         return;
1495     };
1496
1497     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
1498     $break_before->(@all_operators)
1499       if ( $rOpts->{'break-before-all-operators'} );
1500
1501     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
1502     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
1503
1504     # make note if breaks are before certain key types
1505     %want_break_before = ();
1506     foreach my $tok ( @all_operators, ',' ) {
1507         $want_break_before{$tok} =
1508           $left_bond_strength{$tok} < $right_bond_strength{$tok};
1509     }
1510
1511     # Coordinate ?/: breaks, which must be similar
1512     # The small strength 0.01 which is added is 1% of the strength of one
1513     # indentation level and seems to work okay.
1514     if ( !$want_break_before{':'} ) {
1515         $want_break_before{'?'}   = $want_break_before{':'};
1516         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
1517         $left_bond_strength{'?'}  = NO_BREAK;
1518     }
1519
1520     # Only make a hash entry for the next parameters if values are defined.
1521     # That allows a quick check to be made later.
1522     %break_before_container_types = ();
1523     for ( $rOpts->{'break-before-hash-brace'} ) {
1524         $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
1525     }
1526     for ( $rOpts->{'break-before-square-bracket'} ) {
1527         $break_before_container_types{'['} = $_ if $_ && $_ > 0;
1528     }
1529     for ( $rOpts->{'break-before-paren'} ) {
1530         $break_before_container_types{'('} = $_ if $_ && $_ > 0;
1531     }
1532
1533     #--------------------------------------------------------------
1534     # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
1535     #--------------------------------------------------------------
1536     # The -vmll and -lp parameters do not really work well together.
1537     # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
1538     # NOTE: we could make this more precise by looking at any exclusion
1539     # flags for -lp, and allowing -bbx=2 for excluded types.
1540     if (   $rOpts->{'variable-maximum-line-length'}
1541         && $rOpts->{'ignore-old-breakpoints'}
1542         && $rOpts->{'line-up-parentheses'} )
1543     {
1544         my @changed;
1545         foreach my $key ( keys %break_before_container_types ) {
1546             if ( $break_before_container_types{$key} == 2 ) {
1547                 $break_before_container_types{$key} = 1;
1548                 push @changed, $key;
1549             }
1550         }
1551         if (@changed) {
1552
1553             # we could write a warning here
1554         }
1555     }
1556
1557     #-----------------------------------------------------------
1558     # The combination -lp -vmll can be unstable if -ci<2 (b1267)
1559     #-----------------------------------------------------------
1560     # The -vmll and -lp parameters do not really work well together.
1561     # This is a very crude fix for an unusual parameter combination.
1562     if (   $rOpts->{'variable-maximum-line-length'}
1563         && $rOpts->{'line-up-parentheses'}
1564         && $rOpts->{'continuation-indentation'} < 2 )
1565     {
1566         $rOpts->{'continuation-indentation'} = 2;
1567         ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
1568     }
1569
1570     %container_indentation_options = ();
1571     foreach my $pair (
1572         [ 'break-before-hash-brace-and-indent',     '{' ],
1573         [ 'break-before-square-bracket-and-indent', '[' ],
1574         [ 'break-before-paren-and-indent',          '(' ],
1575       )
1576     {
1577         my ( $key, $tok ) = @{$pair};
1578         my $opt = $rOpts->{$key};
1579         if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
1580         {
1581
1582             # (1) -lp is not compatible with opt=2, silently set to opt=0
1583             # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
1584             if ( $opt == 2 ) {
1585                 if (   $rOpts->{'line-up-parentheses'}
1586                     || $rOpts->{'indent-columns'} ==
1587                     $rOpts->{'continuation-indentation'} )
1588                 {
1589                     $opt = 0;
1590                 }
1591             }
1592             $container_indentation_options{$tok} = $opt;
1593         }
1594     }
1595
1596     # Define here tokens which may follow the closing brace of a do statement
1597     # on the same line, as in:
1598     #   } while ( $something);
1599     my @dof = qw(until while unless if ; : );
1600     push @dof, ',';
1601     @is_do_follower{@dof} = (1) x scalar(@dof);
1602
1603     # what can follow a multi-line anonymous sub definition closing curly:
1604     my @asf = qw# ; : => or and  && || ~~ !~~ ) #;
1605     push @asf, ',';
1606     @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
1607
1608     # what can follow a one-line anonymous sub closing curly:
1609     # one-line anonymous subs also have ']' here...
1610     # see tk3.t and PP.pm
1611     my @asf1 = qw#  ; : => or and  && || ) ] ~~ !~~ #;
1612     push @asf1, ',';
1613     @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
1614
1615     # What can follow a closing curly of a block
1616     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
1617     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
1618     my @obf = qw#  ; : => or and  && || ) #;
1619     push @obf, ',';
1620     @is_other_brace_follower{@obf} = (1) x scalar(@obf);
1621
1622     $right_bond_strength{'{'} = WEAK;
1623     $left_bond_strength{'{'}  = VERY_STRONG;
1624
1625     # make -l=0 equal to -l=infinite
1626     if ( !$rOpts->{'maximum-line-length'} ) {
1627         $rOpts->{'maximum-line-length'} = 1_000_000;
1628     }
1629
1630     # make -lbl=0 equal to -lbl=infinite
1631     if ( !$rOpts->{'long-block-line-count'} ) {
1632         $rOpts->{'long-block-line-count'} = 1_000_000;
1633     }
1634
1635     my $ole = $rOpts->{'output-line-ending'};
1636     if ($ole) {
1637         my %endings = (
1638             dos  => "\015\012",
1639             win  => "\015\012",
1640             mac  => "\015",
1641             unix => "\012",
1642         );
1643
1644         # Patch for RT #99514, a memoization issue.
1645         # Normally, the user enters one of 'dos', 'win', etc, and we change the
1646         # value in the options parameter to be the corresponding line ending
1647         # character.  But, if we are using memoization, on later passes through
1648         # here the option parameter will already have the desired ending
1649         # character rather than the keyword 'dos', 'win', etc.  So
1650         # we must check to see if conversion has already been done and, if so,
1651         # bypass the conversion step.
1652         my %endings_inverted = (
1653             "\015\012" => 'dos',
1654             "\015\012" => 'win',
1655             "\015"     => 'mac',
1656             "\012"     => 'unix',
1657         );
1658
1659         if ( defined( $endings_inverted{$ole} ) ) {
1660
1661             # we already have valid line ending, nothing more to do
1662         }
1663         else {
1664             $ole = lc $ole;
1665             unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
1666                 my $str = join SPACE, keys %endings;
1667                 Die(<<EOM);
1668 Unrecognized line ending '$ole'; expecting one of: $str
1669 EOM
1670             }
1671             if ( $rOpts->{'preserve-line-endings'} ) {
1672                 Warn("Ignoring -ple; conflicts with -ole\n");
1673                 $rOpts->{'preserve-line-endings'} = undef;
1674             }
1675         }
1676     }
1677
1678     # hashes used to simplify setting whitespace
1679     %tightness = (
1680         '{' => $rOpts->{'brace-tightness'},
1681         '}' => $rOpts->{'brace-tightness'},
1682         '(' => $rOpts->{'paren-tightness'},
1683         ')' => $rOpts->{'paren-tightness'},
1684         '[' => $rOpts->{'square-bracket-tightness'},
1685         ']' => $rOpts->{'square-bracket-tightness'},
1686     );
1687     %matching_token = (
1688         '{' => '}',
1689         '(' => ')',
1690         '[' => ']',
1691         '?' => ':',
1692     );
1693
1694     if ( $rOpts->{'ignore-old-breakpoints'} ) {
1695
1696         my @conflicts;
1697         if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
1698             $rOpts->{'break-at-old-method-breakpoints'} = 0;
1699             push @conflicts, '--break-at-old-method-breakpoints (-bom)';
1700         }
1701         if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
1702             $rOpts->{'break-at-old-comma-breakpoints'} = 0;
1703             push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
1704         }
1705         if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
1706             $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
1707             push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
1708         }
1709         if ( $rOpts->{'keep-old-breakpoints-before'} ) {
1710             $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
1711             push @conflicts, '--keep-old-breakpoints-before (-kbb)';
1712         }
1713         if ( $rOpts->{'keep-old-breakpoints-after'} ) {
1714             $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
1715             push @conflicts, '--keep-old-breakpoints-after (-kba)';
1716         }
1717
1718         if (@conflicts) {
1719             my $msg = join( "\n  ",
1720 " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
1721                 @conflicts )
1722               . "\n";
1723             Warn($msg);
1724         }
1725
1726         # Note: These additional parameters are made inactive by -iob.
1727         # They are silently turned off here because they are on by default.
1728         # We would generate unexpected warnings if we issued a warning.
1729         $rOpts->{'break-at-old-keyword-breakpoints'}   = 0;
1730         $rOpts->{'break-at-old-logical-breakpoints'}   = 0;
1731         $rOpts->{'break-at-old-ternary-breakpoints'}   = 0;
1732         $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
1733     }
1734
1735     %keep_break_before_type = ();
1736     initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
1737         'kbb', \%keep_break_before_type );
1738
1739     %keep_break_after_type = ();
1740     initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
1741         'kba', \%keep_break_after_type );
1742
1743     #------------------------------------------------------------
1744     # Make global vars for frequently used options for efficiency
1745     #------------------------------------------------------------
1746
1747     $rOpts_add_newlines   = $rOpts->{'add-newlines'};
1748     $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
1749     $rOpts_blank_lines_after_opening_block =
1750       $rOpts->{'blank-lines-after-opening-block'};
1751     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
1752     $rOpts_block_brace_vertical_tightness =
1753       $rOpts->{'block-brace-vertical-tightness'};
1754     $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
1755     $rOpts_break_at_old_attribute_breakpoints =
1756       $rOpts->{'break-at-old-attribute-breakpoints'};
1757     $rOpts_break_at_old_comma_breakpoints =
1758       $rOpts->{'break-at-old-comma-breakpoints'};
1759     $rOpts_break_at_old_keyword_breakpoints =
1760       $rOpts->{'break-at-old-keyword-breakpoints'};
1761     $rOpts_break_at_old_logical_breakpoints =
1762       $rOpts->{'break-at-old-logical-breakpoints'};
1763     $rOpts_break_at_old_semicolon_breakpoints =
1764       $rOpts->{'break-at-old-semicolon-breakpoints'};
1765     $rOpts_break_at_old_ternary_breakpoints =
1766       $rOpts->{'break-at-old-ternary-breakpoints'};
1767     $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
1768     $rOpts_closing_side_comments     = $rOpts->{'closing-side-comments'};
1769     $rOpts_closing_side_comment_else_flag =
1770       $rOpts->{'closing-side-comment-else-flag'};
1771     $rOpts_closing_side_comment_maximum_text =
1772       $rOpts->{'closing-side-comment-maximum-text'};
1773     $rOpts_comma_arrow_breakpoints  = $rOpts->{'comma-arrow-breakpoints'};
1774     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
1775     $rOpts_delete_closing_side_comments =
1776       $rOpts->{'delete-closing-side-comments'};
1777     $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
1778     $rOpts_extended_continuation_indentation =
1779       $rOpts->{'extended-continuation-indentation'};
1780     $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
1781     $rOpts_format_skipping      = $rOpts->{'format-skipping'};
1782     $rOpts_freeze_whitespace    = $rOpts->{'freeze-whitespace'};
1783     $rOpts_function_paren_vertical_alignment =
1784       $rOpts->{'function-paren-vertical-alignment'};
1785     $rOpts_fuzzy_line_length      = $rOpts->{'fuzzy-line-length'};
1786     $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
1787     $rOpts_ignore_side_comment_lengths =
1788       $rOpts->{'ignore-side-comment-lengths'};
1789     $rOpts_indent_closing_brace     = $rOpts->{'indent-closing-brace'};
1790     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
1791     $rOpts_indent_only              = $rOpts->{'indent-only'};
1792     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
1793     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
1794     $rOpts_extended_line_up_parentheses =
1795       $rOpts->{'extended-line-up-parentheses'};
1796     $rOpts_logical_padding = $rOpts->{'logical-padding'};
1797     $rOpts_maximum_consecutive_blank_lines =
1798       $rOpts->{'maximum-consecutive-blank-lines'};
1799     $rOpts_maximum_fields_per_table  = $rOpts->{'maximum-fields-per-table'};
1800     $rOpts_maximum_line_length       = $rOpts->{'maximum-line-length'};
1801     $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
1802     $rOpts_opening_brace_always_on_right =
1803       $rOpts->{'opening-brace-always-on-right'};
1804     $rOpts_outdent_keywords      = $rOpts->{'outdent-keywords'};
1805     $rOpts_outdent_labels        = $rOpts->{'outdent-labels'};
1806     $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
1807     $rOpts_outdent_long_quotes   = $rOpts->{'outdent-long-quotes'};
1808     $rOpts_outdent_static_block_comments =
1809       $rOpts->{'outdent-static-block-comments'};
1810     $rOpts_recombine = $rOpts->{'recombine'};
1811     $rOpts_short_concatenation_item_length =
1812       $rOpts->{'short-concatenation-item-length'};
1813     $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
1814     $rOpts_static_block_comments     = $rOpts->{'static-block-comments'};
1815     $rOpts_sub_alias_list            = $rOpts->{'sub-alias-list'};
1816     $rOpts_tee_block_comments        = $rOpts->{'tee-block-comments'};
1817     $rOpts_tee_pod                   = $rOpts->{'tee-pod'};
1818     $rOpts_tee_side_comments         = $rOpts->{'tee-side-comments'};
1819     $rOpts_valign                    = $rOpts->{'valign'};
1820     $rOpts_valign_code               = $rOpts->{'valign-code'};
1821     $rOpts_valign_side_comments      = $rOpts->{'valign-side-comments'};
1822     $rOpts_variable_maximum_line_length =
1823       $rOpts->{'variable-maximum-line-length'};
1824
1825     # Note that both opening and closing tokens can access the opening
1826     # and closing flags of their container types.
1827     %opening_vertical_tightness = (
1828         '(' => $rOpts->{'paren-vertical-tightness'},
1829         '{' => $rOpts->{'brace-vertical-tightness'},
1830         '[' => $rOpts->{'square-bracket-vertical-tightness'},
1831         ')' => $rOpts->{'paren-vertical-tightness'},
1832         '}' => $rOpts->{'brace-vertical-tightness'},
1833         ']' => $rOpts->{'square-bracket-vertical-tightness'},
1834     );
1835
1836     %closing_vertical_tightness = (
1837         '(' => $rOpts->{'paren-vertical-tightness-closing'},
1838         '{' => $rOpts->{'brace-vertical-tightness-closing'},
1839         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1840         ')' => $rOpts->{'paren-vertical-tightness-closing'},
1841         '}' => $rOpts->{'brace-vertical-tightness-closing'},
1842         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
1843     );
1844
1845     # assume flag for '>' same as ')' for closing qw quotes
1846     %closing_token_indentation = (
1847         ')' => $rOpts->{'closing-paren-indentation'},
1848         '}' => $rOpts->{'closing-brace-indentation'},
1849         ']' => $rOpts->{'closing-square-bracket-indentation'},
1850         '>' => $rOpts->{'closing-paren-indentation'},
1851     );
1852
1853     # flag indicating if any closing tokens are indented
1854     $some_closing_token_indentation =
1855          $rOpts->{'closing-paren-indentation'}
1856       || $rOpts->{'closing-brace-indentation'}
1857       || $rOpts->{'closing-square-bracket-indentation'}
1858       || $rOpts->{'indent-closing-brace'};
1859
1860     %opening_token_right = (
1861         '(' => $rOpts->{'opening-paren-right'},
1862         '{' => $rOpts->{'opening-hash-brace-right'},
1863         '[' => $rOpts->{'opening-square-bracket-right'},
1864     );
1865
1866     %stack_opening_token = (
1867         '(' => $rOpts->{'stack-opening-paren'},
1868         '{' => $rOpts->{'stack-opening-hash-brace'},
1869         '[' => $rOpts->{'stack-opening-square-bracket'},
1870     );
1871
1872     %stack_closing_token = (
1873         ')' => $rOpts->{'stack-closing-paren'},
1874         '}' => $rOpts->{'stack-closing-hash-brace'},
1875         ']' => $rOpts->{'stack-closing-square-bracket'},
1876     );
1877
1878     # Create a table of maximum line length vs level for later efficient use.
1879     # We will make the tables very long to be sure it will not be exceeded.
1880     # But we have to choose a fixed length.  A check will be made at the start
1881     # of sub 'finish_formatting' to be sure it is not exceeded.  Note, some of
1882     # my standard test problems have indentation levels of about 150, so this
1883     # should be fairly large.  If the choice of a maximum level ever becomes
1884     # an issue then these table values could be returned in a sub with a simple
1885     # memoization scheme.
1886
1887     # Also create a table of the maximum spaces available for text due to the
1888     # level only.  If a line has continuation indentation, then that space must
1889     # be subtracted from the table value.  This table is used for preliminary
1890     # estimates in welding, extended_ci, BBX, and marking short blocks.
1891     use constant LEVEL_TABLE_MAX => 1000;
1892
1893     # The basic scheme:
1894     foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1895         my $indent = $level * $rOpts_indent_columns;
1896         $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
1897         $maximum_text_length_at_level[$level] =
1898           $rOpts_maximum_line_length - $indent;
1899     }
1900
1901     # Correct the maximum_text_length table if the -wc=n flag is used
1902     $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
1903     if ($rOpts_whitespace_cycle) {
1904         if ( $rOpts_whitespace_cycle > 0 ) {
1905             foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1906                 my $level_mod = $level % $rOpts_whitespace_cycle;
1907                 my $indent    = $level_mod * $rOpts_indent_columns;
1908                 $maximum_text_length_at_level[$level] =
1909                   $rOpts_maximum_line_length - $indent;
1910             }
1911         }
1912         else {
1913             $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
1914         }
1915     }
1916
1917     # Correct the tables if the -vmll flag is used.  These values override the
1918     # previous values.
1919     if ($rOpts_variable_maximum_line_length) {
1920         foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
1921             $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
1922             $maximum_line_length_at_level[$level] =
1923               $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
1924         }
1925     }
1926
1927     # Define two measures of indentation level, alpha and beta, at which some
1928     # formatting features come under stress and need to start shutting down.
1929     # Some combination of the two will be used to shut down different
1930     # formatting features.
1931     # Put a reasonable upper limit on stress level (say 100) in case the
1932     # whitespace-cycle variable is used.
1933     my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
1934
1935     # Find stress_level_alpha, targeted at very short maximum line lengths.
1936     $stress_level_alpha = $stress_level_limit + 1;
1937     foreach my $level_test ( 0 .. $stress_level_limit ) {
1938         my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
1939         my $excess_inside_space =
1940           $max_len -
1941           $rOpts_continuation_indentation -
1942           $rOpts_indent_columns - 8;
1943         if ( $excess_inside_space <= 0 ) {
1944             $stress_level_alpha = $level_test;
1945             last;
1946         }
1947     }
1948
1949     # Find stress level beta, a stress level targeted at formatting
1950     # at deep levels near the maximum line length.  We start increasing
1951     # from zero and stop at the first level which shows no more space.
1952
1953     # 'const' is a fixed number of spaces for a typical variable.
1954     # Cases b1197-b1204 work ok with const=12 but not with const=8
1955     my $const = 16;
1956     my $denom = max( 1, $rOpts_indent_columns );
1957     $stress_level_beta = 0;
1958     foreach my $level ( 0 .. $stress_level_limit ) {
1959         my $remaining_cycles = max(
1960             0,
1961             (
1962                 $maximum_text_length_at_level[$level] -
1963                   $rOpts_continuation_indentation - $const
1964             ) / $denom
1965         );
1966         last if ( $remaining_cycles <= 3 );    # 2 does not work
1967         $stress_level_beta = $level;
1968     }
1969
1970     initialize_weld_nested_exclusion_rules();
1971
1972     %line_up_parentheses_control_hash    = ();
1973     $line_up_parentheses_control_is_lxpl = 1;
1974     my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
1975     my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
1976     if ( $lpxl && $lpil ) {
1977         Warn( <<EOM );
1978 You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
1979 EOM
1980     }
1981     if ($lpxl) {
1982         $line_up_parentheses_control_is_lxpl = 1;
1983         initialize_line_up_parentheses_control_hash(
1984             $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
1985     }
1986     elsif ($lpil) {
1987         $line_up_parentheses_control_is_lxpl = 0;
1988         initialize_line_up_parentheses_control_hash(
1989             $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
1990     }
1991
1992     return;
1993 } ## end sub check_options
1994
1995 use constant ALIGN_GREP_ALIASES => 0;
1996
1997 sub initialize_grep_and_friends {
1998     my ($str) = @_;
1999
2000     # Initialize or re-initialize hashes with 'grep' and grep aliases. This
2001     # must be done after each set of options because new grep aliases may be
2002     # used.
2003
2004     # re-initialize the hash ... this is critical!
2005     %is_sort_map_grep = ();
2006
2007     my @q = qw(sort map grep);
2008     @is_sort_map_grep{@q} = (1) x scalar(@q);
2009
2010     # Note that any 'grep-alias-list' string has been preprocessed to be a
2011     # trimmed, space-separated list.
2012     my @grep_aliases = split /\s+/, $str;
2013     @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
2014
2015     ##@q = qw(sort map grep eval);
2016     %is_sort_map_grep_eval = %is_sort_map_grep;
2017     $is_sort_map_grep_eval{'eval'} = 1;
2018
2019     ##@q = qw(sort map grep eval do);
2020     %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
2021     $is_sort_map_grep_eval_do{'do'} = 1;
2022
2023     # These block types can take ci.  This is used by the -xci option.
2024     # Note that the 'sub' in this list is an anonymous sub.  To be more correct
2025     # we could remove sub and use ASUB pattern to also handle a
2026     # prototype/signature.  But that would slow things down and would probably
2027     # never be useful.
2028     ##@q = qw( do sub eval sort map grep );
2029     %is_block_with_ci = %is_sort_map_grep_eval_do;
2030     $is_block_with_ci{'sub'} = 1;
2031
2032     %is_keyword_returning_list = ();
2033     @q                         = qw(
2034       grep
2035       keys
2036       map
2037       reverse
2038       sort
2039       split
2040     );
2041     push @q, @grep_aliases;
2042     @is_keyword_returning_list{@q} = (1) x scalar(@q);
2043
2044     # This code enables vertical alignment of grep aliases for testing.  It has
2045     # not been found to be beneficial, so it is off by default.  But it is
2046     # useful for precise testing of the grep alias coding.
2047     if (ALIGN_GREP_ALIASES) {
2048         %block_type_map = (
2049             'unless'  => 'if',
2050             'else'    => 'if',
2051             'elsif'   => 'if',
2052             'when'    => 'if',
2053             'default' => 'if',
2054             'case'    => 'if',
2055             'sort'    => 'map',
2056             'grep'    => 'map',
2057         );
2058         foreach (@q) {
2059             $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
2060         }
2061     }
2062     return;
2063 } ## end sub initialize_grep_and_friends
2064
2065 sub initialize_weld_nested_exclusion_rules {
2066     %weld_nested_exclusion_rules = ();
2067
2068     my $opt_name = 'weld-nested-exclusion-list';
2069     my $str      = $rOpts->{$opt_name};
2070     return unless ($str);
2071     $str =~ s/^\s+//;
2072     $str =~ s/\s+$//;
2073     return unless ($str);
2074
2075     # There are four container tokens.
2076     my %token_keys = (
2077         '(' => '(',
2078         '[' => '[',
2079         '{' => '{',
2080         'q' => 'q',
2081     );
2082
2083     # We are parsing an exclusion list for nested welds. The list is a string
2084     # with spaces separating any number of items.  Each item consists of three
2085     # pieces of information:
2086     # <optional position> <optional type> <type of container>
2087     # <     ^ or .      > <    k or K   > <     ( [ {       >
2088
2089     # The last character is the required container type and must be one of:
2090     # ( = paren
2091     # [ = square bracket
2092     # { = brace
2093
2094     # An optional leading position indicator:
2095     # ^ means the leading token position in the weld
2096     # . means a secondary token position in the weld
2097     #   no position indicator means all positions match
2098
2099     # An optional alphanumeric character between the position and container
2100     # token selects to which the rule applies:
2101     # k = any keyword
2102     # K = any non-keyword
2103     # f = function call
2104     # F = not a function call
2105     # w = function or keyword
2106     # W = not a function or keyword
2107     #     no letter means any preceding type matches
2108
2109     # Examples:
2110     # ^(  - the weld must not start with a paren
2111     # .(  - the second and later tokens may not be parens
2112     # (   - no parens in weld
2113     # ^K(  - exclude a leading paren not preceded by a keyword
2114     # .k(  - exclude a secondary paren preceded by a keyword
2115     # [ {  - exclude all brackets and braces
2116
2117     my @items = split /\s+/, $str;
2118     my $msg1;
2119     my $msg2;
2120     foreach my $item (@items) {
2121         my $item_save = $item;
2122         my $tok       = chop($item);
2123         my $key       = $token_keys{$tok};
2124         if ( !defined($key) ) {
2125             $msg1 .= " '$item_save'";
2126             next;
2127         }
2128         if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
2129             $weld_nested_exclusion_rules{$key} = [];
2130         }
2131         my $rflags = $weld_nested_exclusion_rules{$key};
2132
2133         # A 'q' means do not weld quotes
2134         if ( $tok eq 'q' ) {
2135             $rflags->[0] = '*';
2136             $rflags->[1] = '*';
2137             next;
2138         }
2139
2140         my $pos    = '*';
2141         my $select = '*';
2142         if ($item) {
2143             if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
2144                 $pos    = $1 if ($1);
2145                 $select = $2 if ($2);
2146             }
2147             else {
2148                 $msg1 .= " '$item_save'";
2149                 next;
2150             }
2151         }
2152
2153         my $err;
2154         if ( $pos eq '^' || $pos eq '*' ) {
2155             if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
2156                 $err = 1;
2157             }
2158             $rflags->[0] = $select;
2159         }
2160         if ( $pos eq '.' || $pos eq '*' ) {
2161             if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
2162                 $err = 1;
2163             }
2164             $rflags->[1] = $select;
2165         }
2166         if ($err) { $msg2 .= " '$item_save'"; }
2167     }
2168     if ($msg1) {
2169         Warn(<<EOM);
2170 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2171 $msg1
2172 EOM
2173     }
2174     if ($msg2) {
2175         Warn(<<EOM);
2176 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
2177 $msg2
2178 Only the last will be used.
2179 EOM
2180     }
2181     return;
2182 } ## end sub initialize_weld_nested_exclusion_rules
2183
2184 sub initialize_line_up_parentheses_control_hash {
2185     my ( $str, $opt_name ) = @_;
2186     return unless ($str);
2187     $str =~ s/^\s+//;
2188     $str =~ s/\s+$//;
2189     return unless ($str);
2190
2191     # The format is space separated items, where each item must consist of a
2192     # string with a token type preceded by an optional text token and followed
2193     # by an integer:
2194     # For example:
2195     #    W(1
2196     #  = (flag1)(key)(flag2), where
2197     #    flag1 = 'W'
2198     #    key = '('
2199     #    flag2 = '1'
2200
2201     my @items = split /\s+/, $str;
2202     my $msg1;
2203     my $msg2;
2204     foreach my $item (@items) {
2205         my $item_save = $item;
2206         my ( $flag1, $key, $flag2 );
2207         if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
2208             $flag1 = $1 if $1;
2209             $key   = $2 if $2;
2210             $flag2 = $3 if $3;
2211         }
2212         else {
2213             $msg1 .= " '$item_save'";
2214             next;
2215         }
2216
2217         if ( !defined($key) ) {
2218             $msg1 .= " '$item_save'";
2219             next;
2220         }
2221
2222         # Check for valid flag1
2223         if    ( !defined($flag1) ) { $flag1 = '*' }
2224         elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
2225             $msg1 .= " '$item_save'";
2226             next;
2227         }
2228
2229         # Check for valid flag2
2230         # 0 or blank: ignore container contents
2231         # 1 all containers with sublists match
2232         # 2 all containers with sublists, code blocks or ternary operators match
2233         # ... this could be extended in the future
2234         if    ( !defined($flag2) ) { $flag2 = 0 }
2235         elsif ( $flag2 !~ /^[012]$/ ) {
2236             $msg1 .= " '$item_save'";
2237             next;
2238         }
2239
2240         if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
2241             $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
2242             next;
2243         }
2244
2245         # check for multiple conflicting specifications
2246         my $rflags = $line_up_parentheses_control_hash{$key};
2247         my $err;
2248         if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
2249             $err = 1;
2250             $rflags->[0] = $flag1;
2251         }
2252         if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
2253             $err = 1;
2254             $rflags->[1] = $flag2;
2255         }
2256         $msg2 .= " '$item_save'" if ($err);
2257         next;
2258     }
2259     if ($msg1) {
2260         Warn(<<EOM);
2261 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
2262 $msg1
2263 EOM
2264     }
2265     if ($msg2) {
2266         Warn(<<EOM);
2267 Multiple specifications were encountered in the $opt_name at:
2268 $msg2
2269 Only the last will be used.
2270 EOM
2271     }
2272
2273     # Speedup: we can turn off -lp if it is not actually used
2274     if ($line_up_parentheses_control_is_lxpl) {
2275         my $all_off = 1;
2276         foreach my $key (qw# ( { [ #) {
2277             my $rflags = $line_up_parentheses_control_hash{$key};
2278             if ( defined($rflags) ) {
2279                 my ( $flag1, $flag2 ) = @{$rflags};
2280                 if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
2281                 if ($flag2)                    { $all_off = 0; last }
2282             }
2283         }
2284         if ($all_off) {
2285             $rOpts->{'line-up-parentheses'} = EMPTY_STRING;
2286         }
2287     }
2288
2289     return;
2290 } ## end sub initialize_line_up_parentheses_control_hash
2291
2292 use constant DEBUG_KB => 0;
2293
2294 sub initialize_keep_old_breakpoints {
2295     my ( $str, $short_name, $rkeep_break_hash ) = @_;
2296     return unless $str;
2297
2298     my %flags = ();
2299     my @list  = split_words($str);
2300     if ( DEBUG_KB && @list ) {
2301         local $LIST_SEPARATOR = SPACE;
2302         print <<EOM;
2303 DEBUG_KB entering for '$short_name' with str=$str\n";
2304 list is: @list;
2305 EOM
2306     }
2307
2308     # Ignore kbb='(' and '[' and '{': can cause unstable math formatting
2309     # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
2310     if ( $short_name eq 'kbb' ) {
2311         @list = grep { !m/[\(\[\{]/ } @list;
2312     }
2313     elsif ( $short_name eq 'kba' ) {
2314         @list = grep { !m/[\)\]\}]/ } @list;
2315     }
2316
2317     # pull out any any leading container code, like f( or *{
2318     # For example: 'f(' becomes flags hash entry '(' => 'f'
2319     foreach my $item (@list) {
2320         if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
2321             $item = $2;
2322             $flags{$2} = $1;
2323         }
2324     }
2325
2326     my @unknown_types;
2327     foreach my $type (@list) {
2328         if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
2329             push @unknown_types, $type;
2330         }
2331     }
2332
2333     if (@unknown_types) {
2334         my $num = @unknown_types;
2335         local $LIST_SEPARATOR = SPACE;
2336         Warn(<<EOM);
2337 $num unrecognized token types were input with --$short_name :
2338 @unknown_types
2339 EOM
2340     }
2341
2342     @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
2343
2344     foreach my $key ( keys %flags ) {
2345         my $flag = $flags{$key};
2346
2347         if ( length($flag) != 1 ) {
2348             Warn(<<EOM);
2349 Multiple entries given for '$key' in '$short_name'
2350 EOM
2351         }
2352         elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
2353             Warn(<<EOM);
2354 Unknown flag '$flag' given for '$key' in '$short_name'
2355 EOM
2356         }
2357         elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
2358             Warn(<<EOM);
2359 Unknown flag '$flag' given for '$key' in '$short_name'
2360 EOM
2361         }
2362
2363         $rkeep_break_hash->{$key} = $flag;
2364     }
2365
2366     if ( DEBUG_KB && @list ) {
2367         my @tmp = %flags;
2368         local $LIST_SEPARATOR = SPACE;
2369         print <<EOM;
2370
2371 DEBUG_KB -$short_name flag: $str
2372 final keys:  @list
2373 special flags:  @tmp
2374 EOM
2375
2376     }
2377
2378     return;
2379
2380 } ## end sub initialize_keep_old_breakpoints
2381
2382 sub initialize_whitespace_hashes {
2383
2384     # This is called once before formatting begins to initialize these global
2385     # hashes, which control the use of whitespace around tokens:
2386     #
2387     # %binary_ws_rules
2388     # %want_left_space
2389     # %want_right_space
2390     # %space_after_keyword
2391     #
2392     # Many token types are identical to the tokens themselves.
2393     # See the tokenizer for a complete list. Here are some special types:
2394     #   k = perl keyword
2395     #   f = semicolon in for statement
2396     #   m = unary minus
2397     #   p = unary plus
2398     # Note that :: is excluded since it should be contained in an identifier
2399     # Note that '->' is excluded because it never gets space
2400     # parentheses and brackets are excluded since they are handled specially
2401     # curly braces are included but may be overridden by logic, such as
2402     # newline logic.
2403
2404     # NEW_TOKENS: create a whitespace rule here.  This can be as
2405     # simple as adding your new letter to @spaces_both_sides, for
2406     # example.
2407
2408     my @opening_type = qw< L { ( [ >;
2409     @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
2410
2411     my @closing_type = qw< R } ) ] >;
2412     @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
2413
2414     my @spaces_both_sides = qw#
2415       + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
2416       .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
2417       &&= ||= //= <=> A k f w F n C Y U G v
2418       #;
2419
2420     my @spaces_left_side = qw<
2421       t ! ~ m p { \ h pp mm Z j
2422     >;
2423     push( @spaces_left_side, '#' );    # avoids warning message
2424
2425     my @spaces_right_side = qw<
2426       ; } ) ] R J ++ -- **=
2427     >;
2428     push( @spaces_right_side, ',' );    # avoids warning message
2429
2430     %want_left_space  = ();
2431     %want_right_space = ();
2432     %binary_ws_rules  = ();
2433
2434     # Note that we setting defaults here.  Later in processing
2435     # the values of %want_left_space and  %want_right_space
2436     # may be overridden by any user settings specified by the
2437     # -wls and -wrs parameters.  However the binary_whitespace_rules
2438     # are hardwired and have priority.
2439     @want_left_space{@spaces_both_sides} =
2440       (1) x scalar(@spaces_both_sides);
2441     @want_right_space{@spaces_both_sides} =
2442       (1) x scalar(@spaces_both_sides);
2443     @want_left_space{@spaces_left_side} =
2444       (1) x scalar(@spaces_left_side);
2445     @want_right_space{@spaces_left_side} =
2446       (-1) x scalar(@spaces_left_side);
2447     @want_left_space{@spaces_right_side} =
2448       (-1) x scalar(@spaces_right_side);
2449     @want_right_space{@spaces_right_side} =
2450       (1) x scalar(@spaces_right_side);
2451     $want_left_space{'->'}      = WS_NO;
2452     $want_right_space{'->'}     = WS_NO;
2453     $want_left_space{'**'}      = WS_NO;
2454     $want_right_space{'**'}     = WS_NO;
2455     $want_right_space{'CORE::'} = WS_NO;
2456
2457     # These binary_ws_rules are hardwired and have priority over the above
2458     # settings.  It would be nice to allow adjustment by the user,
2459     # but it would be complicated to specify.
2460     #
2461     # hash type information must stay tightly bound
2462     # as in :  ${xxxx}
2463     $binary_ws_rules{'i'}{'L'} = WS_NO;
2464     $binary_ws_rules{'i'}{'{'} = WS_YES;
2465     $binary_ws_rules{'k'}{'{'} = WS_YES;
2466     $binary_ws_rules{'U'}{'{'} = WS_YES;
2467     $binary_ws_rules{'i'}{'['} = WS_NO;
2468     $binary_ws_rules{'R'}{'L'} = WS_NO;
2469     $binary_ws_rules{'R'}{'{'} = WS_NO;
2470     $binary_ws_rules{'t'}{'L'} = WS_NO;
2471     $binary_ws_rules{'t'}{'{'} = WS_NO;
2472     $binary_ws_rules{'t'}{'='} = WS_OPTIONAL;    # for signatures; fixes b1123
2473     $binary_ws_rules{'}'}{'L'} = WS_NO;
2474     $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL;    # RT#129850; was WS_NO
2475     $binary_ws_rules{'$'}{'L'} = WS_NO;
2476     $binary_ws_rules{'$'}{'{'} = WS_NO;
2477     $binary_ws_rules{'@'}{'L'} = WS_NO;
2478     $binary_ws_rules{'@'}{'{'} = WS_NO;
2479     $binary_ws_rules{'='}{'L'} = WS_YES;
2480     $binary_ws_rules{'J'}{'J'} = WS_YES;
2481
2482     # the following includes ') {'
2483     # as in :    if ( xxx ) { yyy }
2484     $binary_ws_rules{']'}{'L'} = WS_NO;
2485     $binary_ws_rules{']'}{'{'} = WS_NO;
2486     $binary_ws_rules{')'}{'{'} = WS_YES;
2487     $binary_ws_rules{')'}{'['} = WS_NO;
2488     $binary_ws_rules{']'}{'['} = WS_NO;
2489     $binary_ws_rules{']'}{'{'} = WS_NO;
2490     $binary_ws_rules{'}'}{'['} = WS_NO;
2491     $binary_ws_rules{'R'}{'['} = WS_NO;
2492
2493     $binary_ws_rules{']'}{'++'} = WS_NO;
2494     $binary_ws_rules{']'}{'--'} = WS_NO;
2495     $binary_ws_rules{')'}{'++'} = WS_NO;
2496     $binary_ws_rules{')'}{'--'} = WS_NO;
2497
2498     $binary_ws_rules{'R'}{'++'} = WS_NO;
2499     $binary_ws_rules{'R'}{'--'} = WS_NO;
2500
2501     $binary_ws_rules{'i'}{'Q'} = WS_YES;
2502     $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
2503
2504     $binary_ws_rules{'i'}{'('} = WS_NO;
2505
2506     $binary_ws_rules{'w'}{'('} = WS_NO;
2507     $binary_ws_rules{'w'}{'{'} = WS_YES;
2508     return;
2509
2510 } ## end sub initialize_whitespace_hashes
2511
2512 my %is_special_ws_type;
2513 my %is_wCUG;
2514 my %is_wi;
2515
2516 BEGIN {
2517
2518     # The following hash is used to skip over needless if tests.
2519     # Be sure to update it when adding new checks in its block.
2520     my @q = qw(k w i C m - Q);
2521     push @q, '#';
2522     @is_special_ws_type{@q} = (1) x scalar(@q);
2523
2524     # These hashes replace slower regex tests
2525     @q = qw( w C U G );
2526     @is_wCUG{@q} = (1) x scalar(@q);
2527
2528     @q = qw( w i );
2529     @is_wi{@q} = (1) x scalar(@q);
2530 }
2531
2532 use constant DEBUG_WHITE => 0;
2533
2534 sub set_whitespace_flags {
2535
2536     # This routine is called once per file to set whitespace flags for that
2537     # file.  This routine examines each pair of nonblank tokens and sets a flag
2538     # indicating if white space is needed.
2539     #
2540     # $rwhitespace_flags->[$j] is a flag indicating whether a white space
2541     # BEFORE token $j is needed, with the following values:
2542     #
2543     #             WS_NO      = -1 do not want a space BEFORE token $j
2544     #             WS_OPTIONAL=  0 optional space or $j is a whitespace
2545     #             WS_YES     =  1 want a space BEFORE token $j
2546     #
2547
2548     my $self = shift;
2549
2550     my $rLL                  = $self->[_rLL_];
2551     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
2552     my $jmax                 = @{$rLL} - 1;
2553
2554     my $rOpts_space_keyword_paren   = $rOpts->{'space-keyword-paren'};
2555     my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
2556     my $rOpts_space_function_paren  = $rOpts->{'space-function-paren'};
2557
2558     my $rwhitespace_flags       = [];
2559     my $ris_function_call_paren = {};
2560
2561     return $rwhitespace_flags if ( $jmax < 0 );
2562
2563     my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
2564
2565     my ( $rtokh, $token, $type );
2566     my $rtokh_last      = $rLL->[0];
2567     my $rtokh_last_last = $rtokh_last;
2568
2569     my $last_type  = EMPTY_STRING;
2570     my $last_token = EMPTY_STRING;
2571
2572     my $j_tight_closing_paren = -1;
2573
2574     $rtokh = [ @{ $rLL->[0] } ];
2575     $token = SPACE;
2576     $type  = 'b';
2577
2578     $rtokh->[_TOKEN_]         = $token;
2579     $rtokh->[_TYPE_]          = $type;
2580     $rtokh->[_TYPE_SEQUENCE_] = EMPTY_STRING;
2581     $rtokh->[_LINE_INDEX_]    = 0;
2582
2583     # This is some logic moved to a sub to avoid deep nesting of if stmts
2584     my $ws_in_container = sub {
2585
2586         my ($j) = @_;
2587         my $ws = WS_YES;
2588         if ( $j + 1 > $jmax ) { return (WS_NO) }
2589
2590         # Patch to count '-foo' as single token so that
2591         # each of  $a{-foo} and $a{foo} and $a{'foo'} do
2592         # not get spaces with default formatting.
2593         my $j_here = $j;
2594         ++$j_here
2595           if ( $token eq '-'
2596             && $last_token eq '{'
2597             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
2598
2599         # Patch to count a sign separated from a number as a single token, as
2600         # in the following line. Otherwise, it takes two steps to converge:
2601         #    deg2rad(-  0.5)
2602         if (   ( $type eq 'm' || $type eq 'p' )
2603             && $j < $jmax + 1
2604             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
2605             && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
2606             && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
2607         {
2608             $j_here = $j + 2;
2609         }
2610
2611         # $j_next is where a closing token should be if
2612         # the container has a single token
2613         if ( $j_here + 1 > $jmax ) { return (WS_NO) }
2614         my $j_next =
2615           ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
2616           ? $j_here + 2
2617           : $j_here + 1;
2618
2619         if ( $j_next > $jmax ) { return WS_NO }
2620         my $tok_next  = $rLL->[$j_next]->[_TOKEN_];
2621         my $type_next = $rLL->[$j_next]->[_TYPE_];
2622
2623         # for tightness = 1, if there is just one token
2624         # within the matching pair, we will keep it tight
2625         if (
2626             $tok_next eq $matching_token{$last_token}
2627
2628             # but watch out for this: [ [ ]    (misc.t)
2629             && $last_token ne $token
2630
2631             # double diamond is usually spaced
2632             && $token ne '<<>>'
2633
2634           )
2635         {
2636
2637             # remember where to put the space for the closing paren
2638             $j_tight_closing_paren = $j_next;
2639             return (WS_NO);
2640         }
2641         return (WS_YES);
2642     };
2643
2644     # Local hashes to set spaces around container tokens according to their
2645     # sequence numbers.  These are set as keywords are examined.
2646     # They are controlled by the -kpit and -kpitl flags.
2647     my %opening_container_inside_ws;
2648     my %closing_container_inside_ws;
2649     my $set_container_ws_by_keyword = sub {
2650
2651         return unless (%keyword_paren_inner_tightness);
2652
2653         my ( $word, $sequence_number ) = @_;
2654
2655         # We just saw a keyword (or other function name) followed by an opening
2656         # paren. Now check to see if the following paren should have special
2657         # treatment for its inside space.  If so we set a hash value using the
2658         # sequence number as key.
2659         if ( $word && $sequence_number ) {
2660             my $tightness = $keyword_paren_inner_tightness{$word};
2661             if ( defined($tightness) && $tightness != 1 ) {
2662                 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
2663                 $opening_container_inside_ws{$sequence_number} = $ws_flag;
2664                 $closing_container_inside_ws{$sequence_number} = $ws_flag;
2665             }
2666         }
2667         return;
2668     };
2669
2670     my ( $ws_1, $ws_2, $ws_3, $ws_4 );
2671
2672     # main loop over all tokens to define the whitespace flags
2673     foreach my $j ( 0 .. $jmax ) {
2674
2675         if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
2676             $rwhitespace_flags->[$j] = WS_OPTIONAL;
2677             next;
2678         }
2679
2680         $rtokh_last_last = $rtokh_last;
2681
2682         $rtokh_last = $rtokh;
2683         $last_token = $token;
2684         $last_type  = $type;
2685
2686         $rtokh = $rLL->[$j];
2687         $token = $rtokh->[_TOKEN_];
2688         $type  = $rtokh->[_TYPE_];
2689
2690         my $ws;
2691
2692         #---------------------------------------------------------------
2693         # Whitespace Rules Section 1:
2694         # Handle space on the inside of opening braces.
2695         #---------------------------------------------------------------
2696
2697         #    /^[L\{\(\[]$/
2698         if ( $is_opening_type{$last_type} ) {
2699
2700             my $seqno           = $rtokh->[_TYPE_SEQUENCE_];
2701             my $block_type      = $rblock_type_of_seqno->{$seqno};
2702             my $last_seqno      = $rtokh_last->[_TYPE_SEQUENCE_];
2703             my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
2704
2705             $j_tight_closing_paren = -1;
2706
2707             # let us keep empty matched braces together: () {} []
2708             # except for BLOCKS
2709             if ( $token eq $matching_token{$last_token} ) {
2710                 if ($block_type) {
2711                     $ws = WS_YES;
2712                 }
2713                 else {
2714                     $ws = WS_NO;
2715                 }
2716             }
2717             else {
2718
2719                 # we're considering the right of an opening brace
2720                 # tightness = 0 means always pad inside with space
2721                 # tightness = 1 means pad inside if "complex"
2722                 # tightness = 2 means never pad inside with space
2723
2724                 my $tightness;
2725                 if (   $last_type eq '{'
2726                     && $last_token eq '{'
2727                     && $last_block_type )
2728                 {
2729                     $tightness = $rOpts_block_brace_tightness;
2730                 }
2731                 else { $tightness = $tightness{$last_token} }
2732
2733                 #=============================================================
2734                 # Patch for test problem <<snippets/fabrice_bug.in>>
2735                 # We must always avoid spaces around a bare word beginning
2736                 # with ^ as in:
2737                 #    my $before = ${^PREMATCH};
2738                 # Because all of the following cause an error in perl:
2739                 #    my $before = ${ ^PREMATCH };
2740                 #    my $before = ${ ^PREMATCH};
2741                 #    my $before = ${^PREMATCH };
2742                 # So if brace tightness flag is -bt=0 we must temporarily reset
2743                 # to bt=1.  Note that here we must set tightness=1 and not 2 so
2744                 # that the closing space is also avoided
2745                 # (via the $j_tight_closing_paren flag in coding)
2746                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
2747
2748                 #=============================================================
2749
2750                 if ( $tightness <= 0 ) {
2751                     $ws = WS_YES;
2752                 }
2753                 elsif ( $tightness > 1 ) {
2754                     $ws = WS_NO;
2755                 }
2756                 else {
2757                     $ws = $ws_in_container->($j);
2758                 }
2759             }
2760
2761             # check for special cases which override the above rules
2762             if ( %opening_container_inside_ws && $last_seqno ) {
2763                 my $ws_override = $opening_container_inside_ws{$last_seqno};
2764                 if ($ws_override) { $ws = $ws_override }
2765             }
2766
2767             $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
2768               if DEBUG_WHITE;
2769
2770         } ## end setting space flag inside opening tokens
2771
2772         #---------------------------------------------------------------
2773         # Whitespace Rules Section 2:
2774         # Special checks for certain types ...
2775         #---------------------------------------------------------------
2776         # The hash '%is_special_ws_type' significantly speeds up this routine,
2777         # but be sure to update it if a new check is added.
2778         # Currently has types: qw(k w i C m - Q #)
2779         if ( $is_special_ws_type{$type} ) {
2780             if ( $type eq 'i' ) {
2781
2782                 # never a space before ->
2783                 if ( substr( $token, 0, 2 ) eq '->' ) {
2784                     $ws = WS_NO;
2785                 }
2786             }
2787
2788             elsif ( $type eq 'k' ) {
2789
2790                 # Keywords 'for', 'foreach' are special cases for -kpit since
2791                 # the opening paren does not always immediately follow the
2792                 # keyword. So we have to search forward for the paren in this
2793                 # case.  I have limited the search to 10 tokens ahead, just in
2794                 # case somebody has a big file and no opening paren.  This
2795                 # should be enough for all normal code. Added the level check
2796                 # to fix b1236.
2797                 if (   $is_for_foreach{$token}
2798                     && %keyword_paren_inner_tightness
2799                     && defined( $keyword_paren_inner_tightness{$token} )
2800                     && $j < $jmax )
2801                 {
2802                     my $level = $rLL->[$j]->[_LEVEL_];
2803                     my $jp    = $j;
2804                     ## NOTE: we might use the KNEXT variable to avoid this loop
2805                     ## but profiling shows that little would be saved
2806                     foreach my $inc ( 1 .. 9 ) {
2807                         $jp++;
2808                         last if ( $jp > $jmax );
2809                         last if ( $rLL->[$jp]->[_LEVEL_] != $level );    # b1236
2810                         next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
2811                         my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
2812                         $set_container_ws_by_keyword->( $token, $seqno_p );
2813                         last;
2814                     }
2815                 }
2816             }
2817
2818             # retain any space between '-' and bare word
2819             elsif ( $type eq 'w' || $type eq 'C' ) {
2820                 $ws = WS_OPTIONAL if $last_type eq '-';
2821
2822                 # never a space before ->
2823                 if ( substr( $token, 0, 2 ) eq '->' ) {
2824                     $ws = WS_NO;
2825                 }
2826             }
2827
2828             # retain any space between '-' and bare word; for example
2829             # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
2830             #   $myhash{USER-NAME}='steve';
2831             elsif ( $type eq 'm' || $type eq '-' ) {
2832                 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
2833             }
2834
2835             # always space before side comment
2836             elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
2837
2838             # space_backslash_quote; RT #123774  <<snippets/rt123774.in>>
2839             # allow a space between a backslash and single or double quote
2840             # to avoid fooling html formatters
2841             elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
2842             {
2843                 if ($rOpts_space_backslash_quote) {
2844                     if ( $rOpts_space_backslash_quote == 1 ) {
2845                         $ws = WS_OPTIONAL;
2846                     }
2847                     elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
2848                     else { }    # shouldnt happen
2849                 }
2850                 else {
2851                     $ws = WS_NO;
2852                 }
2853             }
2854         } ## end elsif ( $is_special_ws_type{$type} ...
2855
2856         #---------------------------------------------------------------
2857         # Whitespace Rules Section 3:
2858         # Handle space on inside of closing brace pairs.
2859         #---------------------------------------------------------------
2860
2861         #   /[\}\)\]R]/
2862         elsif ( $is_closing_type{$type} ) {
2863
2864             my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2865             if ( $j == $j_tight_closing_paren ) {
2866
2867                 $j_tight_closing_paren = -1;
2868                 $ws                    = WS_NO;
2869             }
2870             else {
2871
2872                 if ( !defined($ws) ) {
2873
2874                     my $tightness;
2875                     my $block_type = $rblock_type_of_seqno->{$seqno};
2876                     if ( $type eq '}' && $token eq '}' && $block_type ) {
2877                         $tightness = $rOpts_block_brace_tightness;
2878                     }
2879                     else { $tightness = $tightness{$token} }
2880
2881                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
2882                 }
2883             }
2884
2885             # check for special cases which override the above rules
2886             if ( %closing_container_inside_ws && $seqno ) {
2887                 my $ws_override = $closing_container_inside_ws{$seqno};
2888                 if ($ws_override) { $ws = $ws_override }
2889             }
2890
2891             $ws_4 = $ws_3 = $ws_2 = $ws
2892               if DEBUG_WHITE;
2893         } ## end setting space flag inside closing tokens
2894
2895         #---------------------------------------------------------------
2896         # Whitespace Rules Section 4:
2897         #---------------------------------------------------------------
2898         #    /^[L\{\(\[]$/
2899         elsif ( $is_opening_type{$type} ) {
2900
2901             if ( $token eq '(' ) {
2902
2903                 my $seqno = $rtokh->[_TYPE_SEQUENCE_];
2904
2905                 # This will have to be tweaked as tokenization changes.
2906                 # We usually want a space at '} (', for example:
2907                 # <<snippets/space1.in>>
2908                 #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
2909                 #
2910                 # But not others:
2911                 #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
2912                 # At present, the above & block is marked as type L/R so this
2913                 # case won't go through here.
2914                 if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
2915
2916                 # NOTE: some older versions of Perl had occasional problems if
2917                 # spaces are introduced between keywords or functions and
2918                 # opening parens.  So the default is not to do this except is
2919                 # certain cases.  The current Perl seems to tolerate spaces.
2920
2921                 # Space between keyword and '('
2922                 elsif ( $last_type eq 'k' ) {
2923                     $ws = WS_NO
2924                       unless ( $rOpts_space_keyword_paren
2925                         || $space_after_keyword{$last_token} );
2926
2927                     # Set inside space flag if requested
2928                     $set_container_ws_by_keyword->( $last_token, $seqno );
2929                 }
2930
2931                 # Space between function and '('
2932                 # -----------------------------------------------------
2933                 # 'w' and 'i' checks for something like:
2934                 #   myfun(    &myfun(   ->myfun(
2935                 # -----------------------------------------------------
2936
2937                 # Note that at this point an identifier may still have a
2938                 # leading arrow, but the arrow will be split off during token
2939                 # respacing.  After that, the token may become a bare word
2940                 # without leading arrow.  The point is, it is best to mark
2941                 # function call parens right here before that happens.
2942                 # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
2943                 # NOTE: this would be the place to allow spaces between
2944                 # repeated parens, like () () (), as in case c017, but I
2945                 # decided that would not be a good idea.
2946                 elsif (
2947                     ##$last_type =~ /^[wCUG]$/
2948                     $is_wCUG{$last_type}
2949                     || (
2950                         ##$last_type =~ /^[wi]$/
2951                         $is_wi{$last_type}
2952
2953                         && (
2954                             $last_token =~ /^([\&]|->)/
2955
2956                             # or -> or & split from bareword by newline (b1337)
2957                             || (
2958                                 $last_token =~ /^\w/
2959                                 && (
2960                                     $rtokh_last_last->[_TYPE_] eq '->'
2961                                     || (   $rtokh_last_last->[_TYPE_] eq 't'
2962                                         && $rtokh_last_last->[_TOKEN_] =~
2963                                         /^\&\s*$/ )
2964                                 )
2965                             )
2966                         )
2967                     )
2968                   )
2969                 {
2970                     $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
2971                     $set_container_ws_by_keyword->( $last_token, $seqno );
2972                     $ris_function_call_paren->{$seqno} = 1;
2973                 }
2974
2975                 # space between something like $i and ( in 'snippets/space2.in'
2976                 # for $i ( 0 .. 20 ) {
2977                 # FIXME: eventually, type 'i' could be split into multiple
2978                 # token types so this can be a hardwired rule.
2979                 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
2980                     $ws = WS_YES;
2981                 }
2982
2983                 # allow constant function followed by '()' to retain no space
2984                 elsif ($last_type eq 'C'
2985                     && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
2986                 {
2987                     $ws = WS_NO;
2988                 }
2989             }
2990
2991             # patch for SWITCH/CASE: make space at ']{' optional
2992             # since the '{' might begin a case or when block
2993             elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
2994                 $ws = WS_OPTIONAL;
2995             }
2996
2997             # keep space between 'sub' and '{' for anonymous sub definition
2998             if ( $type eq '{' ) {
2999                 if ( $last_token eq 'sub' ) {
3000                     $ws = WS_YES;
3001                 }
3002
3003                 # this is needed to avoid no space in '){'
3004                 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
3005
3006                 # avoid any space before the brace or bracket in something like
3007                 #  @opts{'a','b',...}
3008                 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
3009                     $ws = WS_NO;
3010                 }
3011             }
3012         } ## end if ( $is_opening_type{$type} ) {
3013
3014         # always preserver whatever space was used after a possible
3015         # filehandle (except _) or here doc operator
3016         if (
3017             $type ne '#'
3018             && ( ( $last_type eq 'Z' && $last_token ne '_' )
3019                 || $last_type eq 'h' )
3020           )
3021         {
3022             $ws = WS_OPTIONAL;
3023         }
3024
3025         $ws_4 = $ws_3 = $ws
3026           if DEBUG_WHITE;
3027
3028         if ( !defined($ws) ) {
3029
3030             #---------------------------------------------------------------
3031             # Whitespace Rules Section 4:
3032             # Use the binary rule table.
3033             #---------------------------------------------------------------
3034             $ws   = $binary_ws_rules{$last_type}{$type};
3035             $ws_4 = $ws if DEBUG_WHITE;
3036
3037             #---------------------------------------------------------------
3038             # Whitespace Rules Section 5:
3039             # Apply default rules not covered above.
3040             #---------------------------------------------------------------
3041
3042             # If we fall through to here, look at the pre-defined hash tables
3043             # for the two tokens, and:
3044             #  if (they are equal) use the common value
3045             #  if (either is zero or undef) use the other
3046             #  if (either is -1) use it
3047             # That is,
3048             # left  vs right
3049             #  1    vs    1     -->  1
3050             #  0    vs    0     -->  0
3051             # -1    vs   -1     --> -1
3052             #
3053             #  0    vs   -1     --> -1
3054             #  0    vs    1     -->  1
3055             #  1    vs    0     -->  1
3056             # -1    vs    0     --> -1
3057             #
3058             # -1    vs    1     --> -1
3059             #  1    vs   -1     --> -1
3060             if ( !defined($ws) ) {
3061                 my $wl = $want_left_space{$type};
3062                 my $wr = $want_right_space{$last_type};
3063                 if ( !defined($wl) ) {
3064                     $ws = defined($wr) ? $wr : 0;
3065                 }
3066                 elsif ( !defined($wr) ) {
3067                     $ws = $wl;
3068                 }
3069                 else {
3070                     $ws =
3071                       ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
3072                 }
3073             }
3074         }
3075
3076         # Treat newline as a whitespace. Otherwise, we might combine
3077         # 'Send' and '-recipients' here according to the above rules:
3078         # <<snippets/space3.in>>
3079         #    my $msg = new Fax::Send
3080         #      -recipients => $to,
3081         #      -data => $data;
3082         if (   $ws == 0
3083             && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
3084         {
3085             $ws = 1;
3086         }
3087
3088         $rwhitespace_flags->[$j] = $ws;
3089
3090         if (DEBUG_WHITE) {
3091             my $str = substr( $last_token, 0, 15 );
3092             $str .= SPACE x ( 16 - length($str) );
3093             if ( !defined($ws_1) ) { $ws_1 = "*" }
3094             if ( !defined($ws_2) ) { $ws_2 = "*" }
3095             if ( !defined($ws_3) ) { $ws_3 = "*" }
3096             if ( !defined($ws_4) ) { $ws_4 = "*" }
3097             print STDOUT
3098 "NEW WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
3099
3100             # reset for next pass
3101             $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
3102         }
3103     } ## end main loop
3104
3105     if ( $rOpts->{'tight-secret-operators'} ) {
3106         new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
3107     }
3108     $self->[_ris_function_call_paren_] = $ris_function_call_paren;
3109     return $rwhitespace_flags;
3110
3111 } ## end sub set_whitespace_flags
3112
3113 sub dump_want_left_space {
3114     my $fh = shift;
3115     local $LIST_SEPARATOR = "\n";
3116     $fh->print(<<EOM);
3117 These values are the main control of whitespace to the left of a token type;
3118 They may be altered with the -wls parameter.
3119 For a list of token types, use perltidy --dump-token-types (-dtt)
3120  1 means the token wants a space to its left
3121 -1 means the token does not want a space to its left
3122 ------------------------------------------------------------------------
3123 EOM
3124     foreach my $key ( sort keys %want_left_space ) {
3125         $fh->print("$key\t$want_left_space{$key}\n");
3126     }
3127     return;
3128 } ## end sub dump_want_left_space
3129
3130 sub dump_want_right_space {
3131     my $fh = shift;
3132     local $LIST_SEPARATOR = "\n";
3133     $fh->print(<<EOM);
3134 These values are the main control of whitespace to the right of a token type;
3135 They may be altered with the -wrs parameter.
3136 For a list of token types, use perltidy --dump-token-types (-dtt)
3137  1 means the token wants a space to its right
3138 -1 means the token does not want a space to its right
3139 ------------------------------------------------------------------------
3140 EOM
3141     foreach my $key ( sort keys %want_right_space ) {
3142         $fh->print("$key\t$want_right_space{$key}\n");
3143     }
3144     return;
3145 } ## end sub dump_want_right_space
3146
3147 {    ## begin closure is_essential_whitespace
3148
3149     my %is_sort_grep_map;
3150     my %is_for_foreach;
3151     my %is_digraph;
3152     my %is_trigraph;
3153     my %essential_whitespace_filter_l1;
3154     my %essential_whitespace_filter_r1;
3155     my %essential_whitespace_filter_l2;
3156     my %essential_whitespace_filter_r2;
3157     my %is_type_with_space_before_bareword;
3158     my %is_special_variable_char;
3159
3160     BEGIN {
3161
3162         my @q;
3163
3164         # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
3165         # grep aliases on purpose, since here we are looking parens, not braces
3166         @q = qw(sort grep map);
3167         @is_sort_grep_map{@q} = (1) x scalar(@q);
3168
3169         @q = qw(for foreach);
3170         @is_for_foreach{@q} = (1) x scalar(@q);
3171
3172         @q = qw(
3173           .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
3174           <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
3175         );
3176         @is_digraph{@q} = (1) x scalar(@q);
3177
3178         @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
3179         @is_trigraph{@q} = (1) x scalar(@q);
3180
3181         # These are used as a speedup filters for sub is_essential_whitespace.
3182
3183         # Filter 1:
3184         # These left side token types USUALLY do not require a space:
3185         @q = qw( ; { } [ ] L R );
3186         push @q, ',';
3187         push @q, ')';
3188         push @q, '(';
3189         @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
3190
3191         # BUT some might if followed by these right token types
3192         @q = qw( pp mm << <<= h );
3193         @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
3194
3195         # Filter 2:
3196         # These right side filters usually do not require a space
3197         @q = qw( ; ] R } );
3198         push @q, ',';
3199         push @q, ')';
3200         @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
3201
3202         # BUT some might if followed by these left token types
3203         @q = qw( h Z );
3204         @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
3205
3206         # Keep a space between certain types and any bareword:
3207         # Q: keep a space between a quote and a bareword to prevent the
3208         #    bareword from becoming a quote modifier.
3209         # &: do not remove space between an '&' and a bare word because
3210         #    it may turn into a function evaluation, like here
3211         #    between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
3212         #      $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
3213         @q = qw( Q & );
3214         @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
3215
3216         # These are the only characters which can (currently) form special
3217         # variables, like $^W: (issue c066, c068).
3218         @q =
3219           qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
3220         @{is_special_variable_char}{@q} = (1) x scalar(@q);
3221
3222     }
3223
3224     sub is_essential_whitespace {
3225
3226         # Essential whitespace means whitespace which cannot be safely deleted
3227         # without risking the introduction of a syntax error.
3228         # We are given three tokens and their types:
3229         # ($tokenl, $typel) is the token to the left of the space in question
3230         # ($tokenr, $typer) is the token to the right of the space in question
3231         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
3232         #
3233         # Note1: This routine should almost never need to be changed.  It is
3234         # for avoiding syntax problems rather than for formatting.
3235
3236         # Note2: The -mangle option causes large numbers of calls to this
3237         # routine and therefore is a good test. So if a change is made, be sure
3238         # to use nytprof to profile with both old and reviesed coding using the
3239         # -mangle option and check differences.
3240
3241         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
3242
3243         # This is potentially a very slow routine but the following quick
3244         # filters typically catch and handle over 90% of the calls.
3245
3246         # Filter 1: usually no space required after common types ; , [ ] { } ( )
3247         return
3248           if ( $essential_whitespace_filter_l1{$typel}
3249             && !$essential_whitespace_filter_r1{$typer} );
3250
3251         # Filter 2: usually no space before common types ; ,
3252         return
3253           if ( $essential_whitespace_filter_r2{$typer}
3254             && !$essential_whitespace_filter_l2{$typel} );
3255
3256         # Filter 3: Handle side comments: a space is only essential if the left
3257         # token ends in '$' For example, we do not want to create $#foo below:
3258
3259         #   sub t086
3260         #       ( #foo)))
3261         #       $ #foo)))
3262         #       a #foo)))
3263         #       ) #foo)))
3264         #       { ... }
3265
3266         # Also, I prefer not to put a ? and # together because ? used to be
3267         # a pattern delimiter and spacing was used if guessing was needed.
3268
3269         if ( $typer eq '#' ) {
3270
3271             return 1
3272               if ( $tokenl
3273                 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
3274             return;
3275         }
3276
3277         my $tokenr_is_bareword   = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
3278         my $tokenr_is_open_paren = $tokenr eq '(';
3279         my $token_joined         = $tokenl . $tokenr;
3280         my $tokenl_is_dash       = $tokenl eq '-';
3281
3282         my $result =
3283
3284           # never combine two bare words or numbers
3285           # examples:  and ::ok(1)
3286           #            return ::spw(...)
3287           #            for bla::bla:: abc
3288           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3289           #            $input eq"quit" to make $inputeq"quit"
3290           #            my $size=-s::SINK if $file;  <==OK but we won't do it
3291           # don't join something like: for bla::bla:: abc
3292           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
3293           (      ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
3294               && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
3295
3296           # do not combine a number with a concatenation dot
3297           # example: pom.caputo:
3298           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
3299           || $typel eq 'n' && $tokenr eq '.'
3300           || $typer eq 'n' && $tokenl eq '.'
3301
3302           # cases of a space before a bareword...
3303           || (
3304             $tokenr_is_bareword && (
3305
3306                 # do not join a minus with a bare word, because you might form
3307                 # a file test operator.  Example from Complex.pm:
3308                 # if (CORE::abs($z - i) < $eps);
3309                 # "z-i" would be taken as a file test.
3310                 $tokenl_is_dash && length($tokenr) == 1
3311
3312                 # and something like this could become ambiguous without space
3313                 # after the '-':
3314                 #   use constant III=>1;
3315                 #   $a = $b - III;
3316                 # and even this:
3317                 #   $a = - III;
3318                 || $tokenl_is_dash && $typer =~ /^[wC]$/
3319
3320                 # keep space between types Q & and a bareword
3321                 || $is_type_with_space_before_bareword{$typel}
3322
3323                 # +-: binary plus and minus before a bareword could get
3324                 # converted into unary plus and minus on next pass through the
3325                 # tokenizer. This can lead to blinkers: cases b660 b670 b780
3326                 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
3327                 # follows an operator
3328                 || ( ( $typel eq '+' || $typel eq '-' )
3329                     && $typell !~ /^[niC\)\}\]R]$/ )
3330
3331                 # keep a space between a token ending in '$' and any word;
3332                 # this caused trouble:  "die @$ if $@"
3333                 || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
3334
3335                 # don't combine $$ or $# with any alphanumeric
3336                 # (testfile mangle.t with --mangle)
3337                 || $tokenl eq '$$'
3338                 || $tokenl eq '$#'
3339
3340             )
3341           )    ## end $tokenr_is_bareword
3342
3343           # OLD, not used
3344           # '= -' should not become =- or you will get a warning
3345           # about reversed -=
3346           # || ($tokenr eq '-')
3347
3348           # do not join a bare word with a minus, like between 'Send' and
3349           # '-recipients' here <<snippets/space3.in>>
3350           #   my $msg = new Fax::Send
3351           #     -recipients => $to,
3352           #     -data => $data;
3353           # This is the safest thing to do. If we had the token to the right of
3354           # the minus we could do a better check.
3355           #
3356           # And do not combine a bareword and a quote, like this:
3357           #    oops "Your login, $Bad_Login, is not valid";
3358           # It can cause a syntax error if oops is a sub
3359           || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
3360
3361           # perl is very fussy about spaces before <<
3362           || substr( $tokenr, 0, 2 ) eq '<<'
3363
3364           # avoid combining tokens to create new meanings. Example:
3365           #     $a+ +$b must not become $a++$b
3366           || ( $is_digraph{$token_joined} )
3367           || $is_trigraph{$token_joined}
3368
3369           # another example: do not combine these two &'s:
3370           #     allow_options & &OPT_EXECCGI
3371           || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
3372
3373           # retain any space after possible filehandle
3374           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
3375           || $typel eq 'Z'
3376
3377           # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
3378           # space after type Y. Otherwise, it will get parsed as type 'Z' later
3379           # and any space would have to be added back manually if desired.
3380           || $typel eq 'Y'
3381
3382           # Perl is sensitive to whitespace after the + here:
3383           #  $b = xvals $a + 0.1 * yvals $a;
3384           || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
3385
3386           || (
3387             $tokenr_is_open_paren && (
3388
3389                 # keep paren separate in 'use Foo::Bar ()'
3390                 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
3391
3392                 # OLD: keep any space between filehandle and paren:
3393                 # file mangle.t with --mangle:
3394                 # NEW: this test is no longer necessary here (moved above)
3395                 ## || $typel eq 'Y'
3396
3397                 # must have space between grep and left paren; "grep(" will fail
3398                 || $is_sort_grep_map{$tokenl}
3399
3400                 # don't stick numbers next to left parens, as in:
3401                 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
3402                 || $typel eq 'n'
3403             )
3404           )    ## end $tokenr_is_open_paren
3405
3406           # retain any space after here doc operator ( hereerr.t)
3407           || $typel eq 'h'
3408
3409           # be careful with a space around ++ and --, to avoid ambiguity as to
3410           # which token it applies
3411           || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
3412           || ( $typel eq '++' || $typel eq '--' )
3413           && $tokenr !~ /^[\;\}\)\]]/
3414
3415           # need space after foreach my; for example, this will fail in
3416           # older versions of Perl:
3417           # foreach my$ft(@filetypes)...
3418           || (
3419             $tokenl eq 'my'
3420
3421             && substr( $tokenr, 0, 1 ) eq '$'
3422
3423             #  /^(for|foreach)$/
3424             && $is_for_foreach{$tokenll}
3425           )
3426
3427           # Keep space after like $^ if needed to avoid forming a different
3428           # special variable (issue c068). For example:
3429           #       my $aa = $^ ? "none" : "ok";
3430           || ( $typel eq 'i'
3431             && length($tokenl) == 2
3432             && substr( $tokenl, 1, 1 ) eq '^'
3433             && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
3434
3435           # We must be sure that a space between a ? and a quoted string
3436           # remains if the space before the ? remains.  [Loca.pm, lockarea]
3437           # ie,
3438           #    $b=join $comma ? ',' : ':', @_;  # ok
3439           #    $b=join $comma?',' : ':', @_;    # ok!
3440           #    $b=join $comma ?',' : ':', @_;   # error!
3441           # Not really required:
3442           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
3443
3444           # Space stacked labels...
3445           # Not really required: Perl seems to accept non-spaced labels.
3446           ## || $typel eq 'J' && $typer eq 'J'
3447
3448           ;    # the value of this long logic sequence is the result we want
3449         return $result;
3450     } ## end sub is_essential_whitespace
3451 } ## end closure is_essential_whitespace
3452
3453 {    ## begin closure new_secret_operator_whitespace
3454
3455     my %secret_operators;
3456     my %is_leading_secret_token;
3457
3458     BEGIN {
3459
3460         # token lists for perl secret operators as compiled by Philippe Bruhat
3461         # at: https://metacpan.org/module/perlsecret
3462         %secret_operators = (
3463             'Goatse'             => [qw#= ( ) =#],        #=( )=
3464             'Venus1'             => [qw#0 +#],            # 0+
3465             'Venus2'             => [qw#+ 0#],            # +0
3466             'Enterprise'         => [qw#) x ! !#],        # ()x!!
3467             'Kite1'              => [qw#~ ~ <>#],         # ~~<>
3468             'Kite2'              => [qw#~~ <>#],          # ~~<>
3469             'Winking Fat Comma'  => [ ( ',', '=>' ) ],    # ,=>
3470             'Bang bang         ' => [qw#! !#],            # !!
3471         );
3472
3473         # The following operators and constants are not included because they
3474         # are normally kept tight by perltidy:
3475         # ~~ <~>
3476         #
3477
3478         # Make a lookup table indexed by the first token of each operator:
3479         # first token => [list, list, ...]
3480         foreach my $value ( values(%secret_operators) ) {
3481             my $tok = $value->[0];
3482             push @{ $is_leading_secret_token{$tok} }, $value;
3483         }
3484     }
3485
3486     sub new_secret_operator_whitespace {
3487
3488         my ( $rlong_array, $rwhitespace_flags ) = @_;
3489
3490         # Loop over all tokens in this line
3491         my ( $token, $type );
3492         my $jmax = @{$rlong_array} - 1;
3493         foreach my $j ( 0 .. $jmax ) {
3494
3495             $token = $rlong_array->[$j]->[_TOKEN_];
3496             $type  = $rlong_array->[$j]->[_TYPE_];
3497
3498             # Skip unless this token might start a secret operator
3499             next if ( $type eq 'b' );
3500             next unless ( $is_leading_secret_token{$token} );
3501
3502             #      Loop over all secret operators with this leading token
3503             foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
3504                 my $jend = $j - 1;
3505                 foreach my $tok ( @{$rpattern} ) {
3506                     $jend++;
3507                     $jend++
3508
3509                       if ( $jend <= $jmax
3510                         && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
3511                     if (   $jend > $jmax
3512                         || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
3513                     {
3514                         $jend = undef;
3515                         last;
3516                     }
3517                 }
3518
3519                 if ($jend) {
3520
3521                     # set flags to prevent spaces within this operator
3522                     foreach my $jj ( $j + 1 .. $jend ) {
3523                         $rwhitespace_flags->[$jj] = WS_NO;
3524                     }
3525                     $j = $jend;
3526                     last;
3527                 }
3528             }    ##      End Loop over all operators
3529         }    ## End loop over all tokens
3530         return;
3531     }    # End sub
3532 } ## end closure new_secret_operator_whitespace
3533
3534 {    ## begin closure set_bond_strengths
3535
3536     # These routines and variables are involved in deciding where to break very
3537     # long lines.
3538
3539     my %is_good_keyword_breakpoint;
3540     my %is_lt_gt_le_ge;
3541     my %is_container_token;
3542
3543     my %binary_bond_strength_nospace;
3544     my %binary_bond_strength;
3545     my %nobreak_lhs;
3546     my %nobreak_rhs;
3547
3548     my @bias_tokens;
3549     my %bias_hash;
3550     my %bias;
3551     my $delta_bias;
3552
3553     sub initialize_bond_strength_hashes {
3554
3555         my @q;
3556         @q = qw(if unless while until for foreach);
3557         @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
3558
3559         @q = qw(lt gt le ge);
3560         @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
3561
3562         @q = qw/ ( [ { } ] ) /;
3563         @is_container_token{@q} = (1) x scalar(@q);
3564
3565         # The decision about where to break a line depends upon a "bond
3566         # strength" between tokens.  The LOWER the bond strength, the MORE
3567         # likely a break.  A bond strength may be any value but to simplify
3568         # things there are several pre-defined strength levels:
3569
3570         #    NO_BREAK    => 10000;
3571         #    VERY_STRONG => 100;
3572         #    STRONG      => 2.1;
3573         #    NOMINAL     => 1.1;
3574         #    WEAK        => 0.8;
3575         #    VERY_WEAK   => 0.55;
3576
3577         # The strength values are based on trial-and-error, and need to be
3578         # tweaked occasionally to get desired results.  Some comments:
3579         #
3580         #   1. Only relative strengths are important.  small differences
3581         #      in strengths can make big formatting differences.
3582         #   2. Each indentation level adds one unit of bond strength.
3583         #   3. A value of NO_BREAK makes an unbreakable bond
3584         #   4. A value of VERY_WEAK is the strength of a ','
3585         #   5. Values below NOMINAL are considered ok break points.
3586         #   6. Values above NOMINAL are considered poor break points.
3587         #
3588         # The bond strengths should roughly follow precedence order where
3589         # possible.  If you make changes, please check the results very
3590         # carefully on a variety of scripts.  Testing with the -extrude
3591         # options is particularly helpful in exercising all of the rules.
3592
3593         # Wherever possible, bond strengths are defined in the following
3594         # tables.  There are two main stages to setting bond strengths and
3595         # two types of tables:
3596         #
3597         # The first stage involves looking at each token individually and
3598         # defining left and right bond strengths, according to if we want
3599         # to break to the left or right side, and how good a break point it
3600         # is.  For example tokens like =, ||, && make good break points and
3601         # will have low strengths, but one might want to break on either
3602         # side to put them at the end of one line or beginning of the next.
3603         #
3604         # The second stage involves looking at certain pairs of tokens and
3605         # defining a bond strength for that particular pair.  This second
3606         # stage has priority.
3607
3608         #---------------------------------------------------------------
3609         # Bond Strength BEGIN Section 1.
3610         # Set left and right bond strengths of individual tokens.
3611         #---------------------------------------------------------------
3612
3613         # NOTE: NO_BREAK's set in this section first are HINTS which will
3614         # probably not be honored. Essential NO_BREAKS's should be set in
3615         # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
3616         # of this subroutine.
3617
3618         # Note that we are setting defaults in this section.  The user
3619         # cannot change bond strengths but can cause the left and right
3620         # bond strengths of any token type to be swapped through the use of
3621         # the -wba and -wbb flags. In this way the user can determine if a
3622         # breakpoint token should appear at the end of one line or the
3623         # beginning of the next line.
3624
3625         %right_bond_strength          = ();
3626         %left_bond_strength           = ();
3627         %binary_bond_strength_nospace = ();
3628         %binary_bond_strength         = ();
3629         %nobreak_lhs                  = ();
3630         %nobreak_rhs                  = ();
3631
3632         # The hash keys in this section are token types, plus the text of
3633         # certain keywords like 'or', 'and'.
3634
3635         # no break around possible filehandle
3636         $left_bond_strength{'Z'}  = NO_BREAK;
3637         $right_bond_strength{'Z'} = NO_BREAK;
3638
3639         # never put a bare word on a new line:
3640         # example print (STDERR, "bla"); will fail with break after (
3641         $left_bond_strength{'w'} = NO_BREAK;
3642
3643         # blanks always have infinite strength to force breaks after
3644         # real tokens
3645         $right_bond_strength{'b'} = NO_BREAK;
3646
3647         # try not to break on exponentiation
3648         @q                       = qw# ** .. ... <=> #;
3649         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3650         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3651
3652         # The comma-arrow has very low precedence but not a good break point
3653         $left_bond_strength{'=>'}  = NO_BREAK;
3654         $right_bond_strength{'=>'} = NOMINAL;
3655
3656         # ok to break after label
3657         $left_bond_strength{'J'}  = NO_BREAK;
3658         $right_bond_strength{'J'} = NOMINAL;
3659         $left_bond_strength{'j'}  = STRONG;
3660         $right_bond_strength{'j'} = STRONG;
3661         $left_bond_strength{'A'}  = STRONG;
3662         $right_bond_strength{'A'} = STRONG;
3663
3664         $left_bond_strength{'->'}  = STRONG;
3665         $right_bond_strength{'->'} = VERY_STRONG;
3666
3667         $left_bond_strength{'CORE::'}  = NOMINAL;
3668         $right_bond_strength{'CORE::'} = NO_BREAK;
3669
3670         # breaking AFTER modulus operator is ok:
3671         @q = qw< % >;
3672         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3673         @right_bond_strength{@q} =
3674           ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
3675
3676         # Break AFTER math operators * and /
3677         @q                       = qw< * / x  >;
3678         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3679         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3680
3681         # Break AFTER weakest math operators + and -
3682         # Make them weaker than * but a bit stronger than '.'
3683         @q = qw< + - >;
3684         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3685         @right_bond_strength{@q} =
3686           ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
3687
3688         # Define left strength of unary plus and minus (fixes case b511)
3689         $left_bond_strength{p} = $left_bond_strength{'+'};
3690         $left_bond_strength{m} = $left_bond_strength{'-'};
3691
3692         # And make right strength of unary plus and minus very high.
3693         # Fixes cases b670 b790
3694         $right_bond_strength{p} = NO_BREAK;
3695         $right_bond_strength{m} = NO_BREAK;
3696
3697         # breaking BEFORE these is just ok:
3698         @q                       = qw# >> << #;
3699         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3700         @left_bond_strength{@q}  = (NOMINAL) x scalar(@q);
3701
3702         # breaking before the string concatenation operator seems best
3703         # because it can be hard to see at the end of a line
3704         $right_bond_strength{'.'} = STRONG;
3705         $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
3706
3707         @q                       = qw< } ] ) R >;
3708         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
3709         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
3710
3711         # make these a little weaker than nominal so that they get
3712         # favored for end-of-line characters
3713         @q = qw< != == =~ !~ ~~ !~~ >;
3714         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3715         @right_bond_strength{@q} =
3716           ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
3717
3718         # break AFTER these
3719         @q = qw# < >  | & >= <= #;
3720         @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
3721         @right_bond_strength{@q} =
3722           ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
3723
3724         # breaking either before or after a quote is ok
3725         # but bias for breaking before a quote
3726         $left_bond_strength{'Q'}  = NOMINAL;
3727         $right_bond_strength{'Q'} = NOMINAL + 0.02;
3728         $left_bond_strength{'q'}  = NOMINAL;
3729         $right_bond_strength{'q'} = NOMINAL;
3730
3731         # starting a line with a keyword is usually ok
3732         $left_bond_strength{'k'} = NOMINAL;
3733
3734         # we usually want to bond a keyword strongly to what immediately
3735         # follows, rather than leaving it stranded at the end of a line
3736         $right_bond_strength{'k'} = STRONG;
3737
3738         $left_bond_strength{'G'}  = NOMINAL;
3739         $right_bond_strength{'G'} = STRONG;
3740
3741         # assignment operators
3742         @q = qw(
3743           = **= += *= &= <<= &&=
3744           -= /= |= >>= ||= //=
3745           .= %= ^=
3746           x=
3747         );
3748
3749         # Default is to break AFTER various assignment operators
3750         @left_bond_strength{@q} = (STRONG) x scalar(@q);
3751         @right_bond_strength{@q} =
3752           ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
3753
3754         # Default is to break BEFORE '&&' and '||' and '//'
3755         # set strength of '||' to same as '=' so that chains like
3756         # $a = $b || $c || $d   will break before the first '||'
3757         $right_bond_strength{'||'} = NOMINAL;
3758         $left_bond_strength{'||'}  = $right_bond_strength{'='};
3759
3760         # same thing for '//'
3761         $right_bond_strength{'//'} = NOMINAL;
3762         $left_bond_strength{'//'}  = $right_bond_strength{'='};
3763
3764         # set strength of && a little higher than ||
3765         $right_bond_strength{'&&'} = NOMINAL;
3766         $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
3767
3768         $left_bond_strength{';'}  = VERY_STRONG;
3769         $right_bond_strength{';'} = VERY_WEAK;
3770         $left_bond_strength{'f'}  = VERY_STRONG;
3771
3772         # make right strength of for ';' a little less than '='
3773         # to make for contents break after the ';' to avoid this:
3774         #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
3775         #     $number_of_fields )
3776         # and make it weaker than ',' and 'and' too
3777         $right_bond_strength{'f'} = VERY_WEAK - 0.03;
3778
3779         # The strengths of ?/: should be somewhere between
3780         # an '=' and a quote (NOMINAL),
3781         # make strength of ':' slightly less than '?' to help
3782         # break long chains of ? : after the colons
3783         $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
3784         $right_bond_strength{':'} = NO_BREAK;
3785         $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
3786         $right_bond_strength{'?'} = NO_BREAK;
3787
3788         $left_bond_strength{','}  = VERY_STRONG;
3789         $right_bond_strength{','} = VERY_WEAK;
3790
3791         # remaining digraphs and trigraphs not defined above
3792         @q                       = qw( :: <> ++ --);
3793         @left_bond_strength{@q}  = (WEAK) x scalar(@q);
3794         @right_bond_strength{@q} = (STRONG) x scalar(@q);
3795
3796         # Set bond strengths of certain keywords
3797         # make 'or', 'err', 'and' slightly weaker than a ','
3798         $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
3799         $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
3800         $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
3801         $left_bond_strength{'xor'}  = VERY_WEAK - 0.01;
3802         $right_bond_strength{'and'} = NOMINAL;
3803         $right_bond_strength{'or'}  = NOMINAL;
3804         $right_bond_strength{'err'} = NOMINAL;
3805         $right_bond_strength{'xor'} = NOMINAL;
3806
3807         #---------------------------------------------------------------
3808         # Bond Strength BEGIN Section 2.
3809         # Set binary rules for bond strengths between certain token types.
3810         #---------------------------------------------------------------
3811
3812         #  We have a little problem making tables which apply to the
3813         #  container tokens.  Here is a list of container tokens and
3814         #  their types:
3815         #
3816         #   type    tokens // meaning
3817         #      {    {, [, ( // indent
3818         #      }    }, ], ) // outdent
3819         #      [    [ // left non-structural [ (enclosing an array index)
3820         #      ]    ] // right non-structural square bracket
3821         #      (    ( // left non-structural paren
3822         #      )    ) // right non-structural paren
3823         #      L    { // left non-structural curly brace (enclosing a key)
3824         #      R    } // right non-structural curly brace
3825         #
3826         #  Some rules apply to token types and some to just the token
3827         #  itself.  We solve the problem by combining type and token into a
3828         #  new hash key for the container types.
3829         #
3830         #  If a rule applies to a token 'type' then we need to make rules
3831         #  for each of these 'type.token' combinations:
3832         #  Type    Type.Token
3833         #  {       {{, {[, {(
3834         #  [       [[
3835         #  (       ((
3836         #  L       L{
3837         #  }       }}, }], })
3838         #  ]       ]]
3839         #  )       ))
3840         #  R       R}
3841         #
3842         #  If a rule applies to a token then we need to make rules for
3843         #  these 'type.token' combinations:
3844         #  Token   Type.Token
3845         #  {       {{, L{
3846         #  [       {[, [[
3847         #  (       {(, ((
3848         #  }       }}, R}
3849         #  ]       }], ]]
3850         #  )       }), ))
3851
3852         # allow long lines before final { in an if statement, as in:
3853         #    if (..........
3854         #      ..........)
3855         #    {
3856         #
3857         # Otherwise, the line before the { tends to be too short.
3858
3859         $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
3860         $binary_bond_strength{'(('}{'{{'} = NOMINAL;
3861
3862         # break on something like '} (', but keep this stronger than a ','
3863         # example is in 'howe.pl'
3864         $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3865         $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
3866
3867         # keep matrix and hash indices together
3868         # but make them a little below STRONG to allow breaking open
3869         # something like {'some-word'}{'some-very-long-word'} at the }{
3870         # (bracebrk.t)
3871         $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3872         $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3873         $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
3874         $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
3875
3876         # increase strength to the point where a break in the following
3877         # will be after the opening paren rather than at the arrow:
3878         #    $a->$b($c);
3879         $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
3880
3881     # Note that the following alternative strength would make the break at the
3882     # '->' rather than opening the '('.  Both have advantages and disadvantages.
3883     # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
3884
3885         $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3886         $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3887         $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3888         $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3889         $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3890         $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
3891
3892         $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3893         $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
3894         $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3895         $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
3896
3897         #---------------------------------------------------------------
3898         # Binary NO_BREAK rules
3899         #---------------------------------------------------------------
3900
3901         # use strict requires that bare word and => not be separated
3902         $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
3903         $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
3904
3905         # Never break between a bareword and a following paren because
3906         # perl may give an error.  For example, if a break is placed
3907         # between 'to_filehandle' and its '(' the following line will
3908         # give a syntax error [Carp.pm]: my( $no) =fileno(
3909         # to_filehandle( $in)) ;
3910         $binary_bond_strength{'C'}{'(('} = NO_BREAK;
3911         $binary_bond_strength{'C'}{'{('} = NO_BREAK;
3912         $binary_bond_strength{'U'}{'(('} = NO_BREAK;
3913         $binary_bond_strength{'U'}{'{('} = NO_BREAK;
3914
3915         # use strict requires that bare word within braces not start new
3916         # line
3917         $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
3918
3919         $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
3920
3921         # The following two rules prevent a syntax error caused by breaking up
3922         # a construction like '{-y}'.  The '-' quotes the 'y' and prevents
3923         # it from being taken as a transliteration. We have to keep
3924         # token types 'L m w' together to prevent this error.
3925         $binary_bond_strength{'L{'}{'m'}        = NO_BREAK;
3926         $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
3927
3928         # keep 'bareword-' together, but only if there is no space between
3929         # the word and dash. Do not keep together if there is a space.
3930         # example 'use perl6-alpha'
3931         $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
3932
3933         # use strict requires that bare word and => not be separated
3934         $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
3935
3936         # use strict does not allow separating type info from trailing { }
3937         # testfile is readmail.pl
3938         $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
3939         $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
3940
3941         # As a defensive measure, do not break between a '(' and a
3942         # filehandle.  In some cases, this can cause an error.  For
3943         # example, the following program works:
3944         #    my $msg="hi!\n";
3945         #    print
3946         #    ( STDOUT
3947         #    $msg
3948         #    );
3949         #
3950         # But this program fails:
3951         #    my $msg="hi!\n";
3952         #    print
3953         #    (
3954         #    STDOUT
3955         #    $msg
3956         #    );
3957         #
3958         # This is normally only a problem with the 'extrude' option
3959         $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
3960         $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
3961
3962         # never break between sub name and opening paren
3963         $binary_bond_strength{'w'}{'(('} = NO_BREAK;
3964         $binary_bond_strength{'w'}{'{('} = NO_BREAK;
3965
3966         # keep '}' together with ';'
3967         $binary_bond_strength{'}}'}{';'} = NO_BREAK;
3968
3969         # Breaking before a ++ can cause perl to guess wrong. For
3970         # example the following line will cause a syntax error
3971         # with -extrude if we break between '$i' and '++' [fixstyle2]
3972         #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
3973         $nobreak_lhs{'++'} = NO_BREAK;
3974
3975         # Do not break before a possible file handle
3976         $nobreak_lhs{'Z'} = NO_BREAK;
3977
3978         # use strict hates bare words on any new line.  For
3979         # example, a break before the underscore here provokes the
3980         # wrath of use strict:
3981         # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
3982         $nobreak_rhs{'F'}      = NO_BREAK;
3983         $nobreak_rhs{'CORE::'} = NO_BREAK;
3984
3985         # To prevent the tokenizer from switching between types 'w' and 'G' we
3986         # need to avoid breaking between type 'G' and the following code block
3987         # brace. Fixes case b929.
3988         $nobreak_rhs{G} = NO_BREAK;
3989
3990         #---------------------------------------------------------------
3991         # Bond Strength BEGIN Section 3.
3992         # Define tables and values for applying a small bias to the above
3993         # values.
3994         #---------------------------------------------------------------
3995         # Adding a small 'bias' to strengths is a simple way to make a line
3996         # break at the first of a sequence of identical terms.  For
3997         # example, to force long string of conditional operators to break
3998         # with each line ending in a ':', we can add a small number to the
3999         # bond strength of each ':' (colon.t)
4000         @bias_tokens = qw( : && || f and or . );       # tokens which get bias
4001         %bias_hash   = map { $_ => 0 } @bias_tokens;
4002         $delta_bias  = 0.0001;    # a very small strength level
4003         return;
4004
4005     } ## end sub initialize_bond_strength_hashes
4006
4007     use constant DEBUG_BOND => 0;
4008
4009     sub set_bond_strengths {
4010
4011         my ($self) = @_;
4012
4013         my $rbond_strength_to_go = [];
4014
4015         my $rLL               = $self->[_rLL_];
4016         my $rK_weld_right     = $self->[_rK_weld_right_];
4017         my $rK_weld_left      = $self->[_rK_weld_left_];
4018         my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
4019
4020         # patch-its always ok to break at end of line
4021         $nobreak_to_go[$max_index_to_go] = 0;
4022
4023         # we start a new set of bias values for each line
4024         %bias = %bias_hash;
4025
4026         my $code_bias = -.01;    # bias for closing block braces
4027
4028         my $type         = 'b';
4029         my $token        = SPACE;
4030         my $token_length = 1;
4031         my $last_type;
4032         my $last_nonblank_type  = $type;
4033         my $last_nonblank_token = $token;
4034         my $list_str            = $left_bond_strength{'?'};
4035
4036         my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
4037
4038         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
4039             $next_nonblank_type, $next_token, $next_type,
4040             $total_nesting_depth, );
4041
4042         # main loop to compute bond strengths between each pair of tokens
4043         foreach my $i ( 0 .. $max_index_to_go ) {
4044             $last_type = $type;
4045             if ( $type ne 'b' ) {
4046                 $last_nonblank_type  = $type;
4047                 $last_nonblank_token = $token;
4048             }
4049             $type = $types_to_go[$i];
4050
4051             # strength on both sides of a blank is the same
4052             if ( $type eq 'b' && $last_type ne 'b' ) {
4053                 $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
4054                 $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
4055                 next;
4056             }
4057
4058             $token               = $tokens_to_go[$i];
4059             $token_length        = $token_lengths_to_go[$i];
4060             $block_type          = $block_type_to_go[$i];
4061             $i_next              = $i + 1;
4062             $next_type           = $types_to_go[$i_next];
4063             $next_token          = $tokens_to_go[$i_next];
4064             $total_nesting_depth = $nesting_depth_to_go[$i_next];
4065             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
4066             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
4067             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
4068
4069             my $seqno               = $type_sequence_to_go[$i];
4070             my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
4071
4072             # We are computing the strength of the bond between the current
4073             # token and the NEXT token.
4074
4075             #---------------------------------------------------------------
4076             # Bond Strength Section 1:
4077             # First Approximation.
4078             # Use minimum of individual left and right tabulated bond
4079             # strengths.
4080             #---------------------------------------------------------------
4081             my $bsr = $right_bond_strength{$type};
4082             my $bsl = $left_bond_strength{$next_nonblank_type};
4083
4084             # define right bond strengths of certain keywords
4085             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
4086                 $bsr = $right_bond_strength{$token};
4087             }
4088             elsif ( $token eq 'ne' or $token eq 'eq' ) {
4089                 $bsr = NOMINAL;
4090             }
4091
4092             # set terminal bond strength to the nominal value
4093             # this will cause good preceding breaks to be retained
4094             if ( $i_next_nonblank > $max_index_to_go ) {
4095                 $bsl = NOMINAL;
4096
4097                 # But weaken the bond at a 'missing terminal comma'.  If an
4098                 # optional comma is missing at the end of a broken list, use
4099                 # the strength of a comma anyway to make formatting the same as
4100                 # if it were there. Fixes issue c133.
4101                 if ( !defined($bsr) || $bsr > VERY_WEAK ) {
4102                     my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
4103                     if ( $ris_list_by_seqno->{$seqno_px} ) {
4104                         my $KK      = $K_to_go[$max_index_to_go];
4105                         my $Kn      = $self->K_next_nonblank($KK);
4106                         my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
4107                         if ( $seqno_n && $seqno_n eq $seqno_px ) {
4108                             $bsl = VERY_WEAK;
4109                         }
4110                     }
4111                 }
4112             }
4113
4114             # define right bond strengths of certain keywords
4115             if ( $next_nonblank_type eq 'k'
4116                 && defined( $left_bond_strength{$next_nonblank_token} ) )
4117             {
4118                 $bsl = $left_bond_strength{$next_nonblank_token};
4119             }
4120             elsif ($next_nonblank_token eq 'ne'
4121                 or $next_nonblank_token eq 'eq' )
4122             {
4123                 $bsl = NOMINAL;
4124             }
4125             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
4126                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
4127             }
4128
4129             # Use the minimum of the left and right strengths.  Note: it might
4130             # seem that we would want to keep a NO_BREAK if either token has
4131             # this value.  This didn't work, for example because in an arrow
4132             # list, it prevents the comma from separating from the following
4133             # bare word (which is probably quoted by its arrow).  So necessary
4134             # NO_BREAK's have to be handled as special cases in the final
4135             # section.
4136             if ( !defined($bsr) ) { $bsr = VERY_STRONG }
4137             if ( !defined($bsl) ) { $bsl = VERY_STRONG }
4138             my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
4139             $bond_str_1 = $bond_str if (DEBUG_BOND);
4140
4141             #---------------------------------------------------------------
4142             # Bond Strength Section 2:
4143             # Apply hardwired rules..
4144             #---------------------------------------------------------------
4145
4146             # Patch to put terminal or clauses on a new line: Weaken the bond
4147             # at an || followed by die or similar keyword to make the terminal
4148             # or clause fall on a new line, like this:
4149             #
4150             #   my $class = shift
4151             #     || die "Cannot add broadcast:  No class identifier found";
4152             #
4153             # Otherwise the break will be at the previous '=' since the || and
4154             # = have the same starting strength and the or is biased, like
4155             # this:
4156             #
4157             # my $class =
4158             #   shift || die "Cannot add broadcast:  No class identifier found";
4159             #
4160             # In any case if the user places a break at either the = or the ||
4161             # it should remain there.
4162             if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
4163
4164                 #    /^(die|confess|croak|warn)$/
4165                 if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
4166                     if ( $want_break_before{$token} && $i > 0 ) {
4167                         $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
4168
4169                         # keep bond strength of a token and its following blank
4170                         # the same
4171                         if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
4172                             $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
4173                         }
4174                     }
4175                     else {
4176                         $bond_str -= $delta_bias;
4177                     }
4178                 }
4179             }
4180
4181             # good to break after end of code blocks
4182             if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
4183
4184                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
4185                 $code_bias += $delta_bias;
4186             }
4187
4188             if ( $type eq 'k' ) {
4189
4190                 # allow certain control keywords to stand out
4191                 if (   $next_nonblank_type eq 'k'
4192                     && $is_last_next_redo_return{$token} )
4193                 {
4194                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
4195                 }
4196
4197                 # Don't break after keyword my.  This is a quick fix for a
4198                 # rare problem with perl. An example is this line from file
4199                 # Container.pm:
4200
4201                 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
4202                 # $this->{'question'} ) )
4203
4204                 if ( $token eq 'my' ) {
4205                     $bond_str = NO_BREAK;
4206                 }
4207
4208             }
4209
4210             if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
4211
4212                 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
4213                     $bond_str = $list_str if ( $bond_str > $list_str );
4214                 }
4215
4216                 # keywords like 'unless', 'if', etc, within statements
4217                 # make good breaks
4218                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
4219                     $bond_str = VERY_WEAK / 1.05;
4220                 }
4221             }
4222
4223             # try not to break before a comma-arrow
4224             elsif ( $next_nonblank_type eq '=>' ) {
4225                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
4226             }
4227
4228             #---------------------------------------------------------------
4229             # Additional hardwired NOBREAK rules
4230             #---------------------------------------------------------------
4231
4232             # map1.t -- correct for a quirk in perl
4233             if (   $token eq '('
4234                 && $next_nonblank_type eq 'i'
4235                 && $last_nonblank_type eq 'k'
4236                 && $is_sort_map_grep{$last_nonblank_token} )
4237
4238               #     /^(sort|map|grep)$/ )
4239             {
4240                 $bond_str = NO_BREAK;
4241             }
4242
4243             # extrude.t: do not break before paren at:
4244             #    -l pid_filename(
4245             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
4246                 $bond_str = NO_BREAK;
4247             }
4248
4249             # OLD COMMENT: In older version of perl, use strict can cause
4250             # problems with breaks before bare words following opening parens.
4251             # For example, this will fail under older versions if a break is
4252             # made between '(' and 'MAIL':
4253
4254             # use strict; open( MAIL, "a long filename or command"); close MAIL;
4255
4256             # NEW COMMENT: Third fix for b1213:
4257             # This option does not seem to be needed any longer, and it can
4258             # cause instabilities.  It can be turned off, but to minimize
4259             # changes to existing formatting it is retained only in the case
4260             # where the previous token was 'open' and there was no line break.
4261             # Even this could eventually be removed if it causes instability.
4262             if ( $type eq '{' ) {
4263
4264                 if (   $token eq '('
4265                     && $next_nonblank_type eq 'w'
4266                     && $last_nonblank_type eq 'k'
4267                     && $last_nonblank_token eq 'open'
4268                     && !$old_breakpoint_to_go[$i] )
4269                 {
4270                     $bond_str = NO_BREAK;
4271                 }
4272             }
4273
4274             # Do not break between a possible filehandle and a ? or / and do
4275             # not introduce a break after it if there is no blank
4276             # (extrude.t)
4277             elsif ( $type eq 'Z' ) {
4278
4279                 # don't break..
4280                 if (
4281
4282                     # if there is no blank and we do not want one. Examples:
4283                     #    print $x++    # do not break after $x
4284                     #    print HTML"HELLO"   # break ok after HTML
4285                     (
4286                            $next_type ne 'b'
4287                         && defined( $want_left_space{$next_type} )
4288                         && $want_left_space{$next_type} == WS_NO
4289                     )
4290
4291                     # or we might be followed by the start of a quote,
4292                     # and this is not an existing breakpoint; fixes c039.
4293                     || !$old_breakpoint_to_go[$i]
4294                     && substr( $next_nonblank_token, 0, 1 ) eq '/'
4295
4296                   )
4297                 {
4298                     $bond_str = NO_BREAK;
4299                 }
4300             }
4301
4302             # Breaking before a ? before a quote can cause trouble if
4303             # they are not separated by a blank.
4304             # Example: a syntax error occurs if you break before the ? here
4305             #  my$logic=join$all?' && ':' || ',@regexps;
4306             # From: Professional_Perl_Programming_Code/multifind.pl
4307             if ( $next_nonblank_type eq '?' ) {
4308                 $bond_str = NO_BREAK
4309                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
4310             }
4311
4312             # Breaking before a . followed by a number
4313             # can cause trouble if there is no intervening space
4314             # Example: a syntax error occurs if you break before the .2 here
4315             #  $str .= pack($endian.2, ensurrogate($ord));
4316             # From: perl58/Unicode.pm
4317             elsif ( $next_nonblank_type eq '.' ) {
4318                 $bond_str = NO_BREAK
4319                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
4320             }
4321
4322             # Fix for c039
4323             elsif ( $type eq 'w' ) {
4324                 $bond_str = NO_BREAK
4325                   if ( !$old_breakpoint_to_go[$i]
4326                     && substr( $next_nonblank_token, 0, 1 ) eq '/' );
4327             }
4328
4329             $bond_str_2 = $bond_str if (DEBUG_BOND);
4330
4331             #---------------------------------------------------------------
4332             # End of hardwired rules
4333             #---------------------------------------------------------------
4334
4335             #---------------------------------------------------------------
4336             # Bond Strength Section 3:
4337             # Apply table rules. These have priority over the above
4338             # hardwired rules.
4339             #---------------------------------------------------------------
4340
4341             my $tabulated_bond_str;
4342             my $ltype = $type;
4343             my $rtype = $next_nonblank_type;
4344             if ( $seqno && $is_container_token{$token} ) {
4345                 $ltype = $type . $token;
4346             }
4347
4348             if (   $next_nonblank_seqno
4349                 && $is_container_token{$next_nonblank_token} )
4350             {
4351                 $rtype = $next_nonblank_type . $next_nonblank_token;
4352
4353                 # Alternate Fix #1 for issue b1299.  This version makes the
4354                 # decision as soon as possible.  See Alternate Fix #2 also.
4355                 # Do not separate a bareword identifier from its paren: b1299
4356                 # This is currently needed for stability because if the bareword
4357                 # gets separated from a preceding '->' and following '(' then
4358                 # the tokenizer may switch from type 'i' to type 'w'.  This
4359                 # patch will prevent this by keeping it adjacent to its '('.
4360 ##              if (   $next_nonblank_token eq '('
4361 ##                  && $ltype eq 'i'
4362 ##                  && substr( $token, 0, 1 ) =~ /^\w$/ )
4363 ##              {
4364 ##                  $ltype = 'w';
4365 ##              }
4366             }
4367
4368             # apply binary rules which apply regardless of space between tokens
4369             if ( $binary_bond_strength{$ltype}{$rtype} ) {
4370                 $bond_str           = $binary_bond_strength{$ltype}{$rtype};
4371                 $tabulated_bond_str = $bond_str;
4372             }
4373
4374             # apply binary rules which apply only if no space between tokens
4375             if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
4376                 $bond_str           = $binary_bond_strength{$ltype}{$next_type};
4377                 $tabulated_bond_str = $bond_str;
4378             }
4379
4380             if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
4381                 $bond_str           = NO_BREAK;
4382                 $tabulated_bond_str = $bond_str;
4383             }
4384
4385             $bond_str_3 = $bond_str if (DEBUG_BOND);
4386
4387             # If the hardwired rules conflict with the tabulated bond
4388             # strength then there is an inconsistency that should be fixed
4389             DEBUG_BOND
4390               && $tabulated_bond_str
4391               && $bond_str_1
4392               && $bond_str_1 != $bond_str_2
4393               && $bond_str_2 != $tabulated_bond_str
4394               && do {
4395                 print STDERR
4396 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
4397               };
4398
4399            #-----------------------------------------------------------------
4400            # Bond Strength Section 4:
4401            # Modify strengths of certain tokens which often occur in sequence
4402            # by adding a small bias to each one in turn so that the breaks
4403            # occur from left to right.
4404            #
4405            # Note that we only changing strengths by small amounts here,
4406            # and usually increasing, so we should not be altering any NO_BREAKs.
4407            # Other routines which check for NO_BREAKs will use a tolerance
4408            # of one to avoid any problem.
4409            #-----------------------------------------------------------------
4410
4411             # The bias tables use special keys:
4412             #   $type - if not keyword
4413             #   $token - if keyword, but map some keywords together
4414             my $left_key =
4415               $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
4416             my $right_key =
4417                 $next_nonblank_type eq 'k'
4418               ? $next_nonblank_token eq 'err'
4419                   ? 'or'
4420                   : $next_nonblank_token
4421               : $next_nonblank_type;
4422
4423             # bias left token
4424             if ( defined( $bias{$left_key} ) ) {
4425                 if ( !$want_break_before{$left_key} ) {
4426                     $bias{$left_key} += $delta_bias;
4427                     $bond_str += $bias{$left_key};
4428                 }
4429             }
4430
4431             # bias right token
4432             if ( defined( $bias{$right_key} ) ) {
4433                 if ( $want_break_before{$right_key} ) {
4434
4435                     # for leading '.' align all but 'short' quotes; the idea
4436                     # is to not place something like "\n" on a single line.
4437                     if ( $right_key eq '.' ) {
4438                         unless (
4439                             $last_nonblank_type eq '.'
4440                             && ( $token_length <=
4441                                 $rOpts_short_concatenation_item_length )
4442                             && ( !$is_closing_token{$token} )
4443                           )
4444                         {
4445                             $bias{$right_key} += $delta_bias;
4446                         }
4447                     }
4448                     else {
4449                         $bias{$right_key} += $delta_bias;
4450                     }
4451                     $bond_str += $bias{$right_key};
4452                 }
4453             }
4454
4455             $bond_str_4 = $bond_str if (DEBUG_BOND);
4456
4457             #---------------------------------------------------------------
4458             # Bond Strength Section 5:
4459             # Fifth Approximation.
4460             # Take nesting depth into account by adding the nesting depth
4461             # to the bond strength.
4462             #---------------------------------------------------------------
4463             my $strength;
4464
4465             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
4466                 if ( $total_nesting_depth > 0 ) {
4467                     $strength = $bond_str + $total_nesting_depth;
4468                 }
4469                 else {
4470                     $strength = $bond_str;
4471                 }
4472             }
4473             else {
4474                 $strength = NO_BREAK;
4475
4476                 # For critical code such as lines with here targets we must
4477                 # be absolutely sure that we do not allow a break.  So for
4478                 # these the nobreak flag exceeds 1 as a signal. Otherwise we
4479                 # can run into trouble when small tolerances are added.
4480                 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
4481             }
4482
4483             #---------------------------------------------------------------
4484             # Bond Strength Section 6:
4485             # Sixth Approximation. Welds.
4486             #---------------------------------------------------------------
4487
4488             # Do not allow a break within welds
4489             if ( $total_weld_count && $seqno ) {
4490                 my $KK = $K_to_go[$i];
4491                 if ( $rK_weld_right->{$KK} ) {
4492                     $strength = NO_BREAK;
4493                 }
4494
4495                 # But encourage breaking after opening welded tokens
4496                 elsif ($rK_weld_left->{$KK}
4497                     && $is_opening_token{$token} )
4498                 {
4499                     $strength -= 1;
4500                 }
4501             }
4502
4503             # always break after side comment
4504             if ( $type eq '#' ) { $strength = 0 }
4505
4506             $rbond_strength_to_go->[$i] = $strength;
4507
4508             # Fix for case c001: be sure NO_BREAK's are enforced by later
4509             # routines, except at a '?' because '?' as quote delimiter is
4510             # deprecated.
4511             if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
4512                 $nobreak_to_go[$i] ||= 1;
4513             }
4514
4515             DEBUG_BOND && do {
4516                 my $str = substr( $token, 0, 15 );
4517                 $str .= SPACE x ( 16 - length($str) );
4518                 print STDOUT
4519 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
4520
4521                 # reset for next pass
4522                 $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
4523             };
4524
4525         } ## end main loop
4526         return $rbond_strength_to_go;
4527     } ## end sub set_bond_strengths
4528 } ## end closure set_bond_strengths
4529
4530 sub bad_pattern {
4531
4532     # See if a pattern will compile. We have to use a string eval here,
4533     # but it should be safe because the pattern has been constructed
4534     # by this program.
4535     my ($pattern) = @_;
4536     eval "'##'=~/$pattern/";
4537     return $EVAL_ERROR;
4538 }
4539
4540 {    ## begin closure prepare_cuddled_block_types
4541
4542     my %no_cuddle;
4543
4544     # Add keywords here which really should not be cuddled
4545     BEGIN {
4546         my @q = qw(if unless for foreach while);
4547         @no_cuddle{@q} = (1) x scalar(@q);
4548     }
4549
4550     sub prepare_cuddled_block_types {
4551
4552         # the cuddled-else style, if used, is controlled by a hash that
4553         # we construct here
4554
4555         # Include keywords here which should not be cuddled
4556
4557         my $cuddled_string = EMPTY_STRING;
4558         if ( $rOpts->{'cuddled-else'} ) {
4559
4560             # set the default
4561             $cuddled_string = 'elsif else continue catch finally'
4562               unless ( $rOpts->{'cuddled-block-list-exclusive'} );
4563
4564             # This is the old equivalent but more complex version
4565             # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
4566
4567             # Add users other blocks to be cuddled
4568             my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
4569             if ($cuddled_block_list) {
4570                 $cuddled_string .= SPACE . $cuddled_block_list;
4571             }
4572
4573         }
4574
4575         # If we have a cuddled string of the form
4576         #  'try-catch-finally'
4577
4578         # we want to prepare a hash of the form
4579
4580         # $rcuddled_block_types = {
4581         #    'try' => {
4582         #        'catch'   => 1,
4583         #        'finally' => 1
4584         #    },
4585         # };
4586
4587         # use -dcbl to dump this hash
4588
4589         # Multiple such strings are input as a space or comma separated list
4590
4591         # If we get two lists with the same leading type, such as
4592         #   -cbl = "-try-catch-finally  -try-catch-otherwise"
4593         # then they will get merged as follows:
4594         # $rcuddled_block_types = {
4595         #    'try' => {
4596         #        'catch'     => 1,
4597         #        'finally'   => 2,
4598         #        'otherwise' => 1,
4599         #    },
4600         # };
4601         # This will allow either type of chain to be followed.
4602
4603         $cuddled_string =~ s/,/ /g;    # allow space or comma separated lists
4604         my @cuddled_strings = split /\s+/, $cuddled_string;
4605
4606         $rcuddled_block_types = {};
4607
4608         # process each dash-separated string...
4609         my $string_count = 0;
4610         foreach my $string (@cuddled_strings) {
4611             next unless $string;
4612             my @words = split /-+/, $string;    # allow multiple dashes
4613
4614             # we could look for and report possible errors here...
4615             next unless ( @words > 0 );
4616
4617            # allow either '-continue' or *-continue' for arbitrary starting type
4618             my $start = '*';
4619
4620             # a single word without dashes is a secondary block type
4621             if ( @words > 1 ) {
4622                 $start = shift @words;
4623             }
4624
4625             # always make an entry for the leading word. If none follow, this
4626             # will still prevent a wildcard from matching this word.
4627             if ( !defined( $rcuddled_block_types->{$start} ) ) {
4628                 $rcuddled_block_types->{$start} = {};
4629             }
4630
4631             # The count gives the original word order in case we ever want it.
4632             $string_count++;
4633             my $word_count = 0;
4634             foreach my $word (@words) {
4635                 next unless $word;
4636                 if ( $no_cuddle{$word} ) {
4637                     Warn(
4638 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
4639                     );
4640                     next;
4641                 }
4642                 $word_count++;
4643                 $rcuddled_block_types->{$start}->{$word} =
4644                   1;    #"$string_count.$word_count";
4645
4646                 # git#9: Remove this word from the list of desired one-line
4647                 # blocks
4648                 $want_one_line_block{$word} = 0;
4649             }
4650         }
4651         return;
4652     } ## end sub prepare_cuddled_block_types
4653 } ## end closure prepare_cuddled_block_types
4654
4655 sub dump_cuddled_block_list {
4656     my ($fh) = @_;
4657
4658     # ORIGINAL METHOD: Here is the format of the cuddled block type hash
4659     # which controls this routine
4660     #    my $rcuddled_block_types = {
4661     #        'if' => {
4662     #            'else'  => 1,
4663     #            'elsif' => 1
4664     #        },
4665     #        'try' => {
4666     #            'catch'   => 1,
4667     #            'finally' => 1
4668     #        },
4669     #    };
4670
4671     # SIMPLIFIED METHOD: the simplified method uses a wildcard for
4672     # the starting block type and puts all cuddled blocks together:
4673     #    my $rcuddled_block_types = {
4674     #        '*' => {
4675     #            'else'  => 1,
4676     #            'elsif' => 1
4677     #            'catch'   => 1,
4678     #            'finally' => 1
4679     #        },
4680     #    };
4681
4682     # Both methods work, but the simplified method has proven to be adequate and
4683     # easier to manage.
4684
4685     my $cuddled_string = $rOpts->{'cuddled-block-list'};
4686     $cuddled_string = EMPTY_STRING unless $cuddled_string;
4687
4688     my $flags = EMPTY_STRING;
4689     $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
4690     $flags .= " -cbl='$cuddled_string'";
4691
4692     unless ( $rOpts->{'cuddled-else'} ) {
4693         $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
4694     }
4695
4696     $fh->print(<<EOM);
4697 ------------------------------------------------------------------------
4698 Hash of cuddled block types prepared for a run with these parameters:
4699   $flags
4700 ------------------------------------------------------------------------
4701 EOM
4702
4703     use Data::Dumper;
4704     $fh->print( Dumper($rcuddled_block_types) );
4705
4706     $fh->print(<<EOM);
4707 ------------------------------------------------------------------------
4708 EOM
4709     return;
4710 } ## end sub dump_cuddled_block_list
4711
4712 sub make_static_block_comment_pattern {
4713
4714     # create the pattern used to identify static block comments
4715     $static_block_comment_pattern = '^\s*##';
4716
4717     # allow the user to change it
4718     if ( $rOpts->{'static-block-comment-prefix'} ) {
4719         my $prefix = $rOpts->{'static-block-comment-prefix'};
4720         $prefix =~ s/^\s*//;
4721         my $pattern = $prefix;
4722
4723         # user may give leading caret to force matching left comments only
4724         if ( $prefix !~ /^\^#/ ) {
4725             if ( $prefix !~ /^#/ ) {
4726                 Die(
4727 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
4728                 );
4729             }
4730             $pattern = '^\s*' . $prefix;
4731         }
4732         if ( bad_pattern($pattern) ) {
4733             Die(
4734 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
4735             );
4736         }
4737         $static_block_comment_pattern = $pattern;
4738     }
4739     return;
4740 } ## end sub make_static_block_comment_pattern
4741
4742 sub make_format_skipping_pattern {
4743     my ( $opt_name, $default ) = @_;
4744     my $param = $rOpts->{$opt_name};
4745     unless ($param) { $param = $default }
4746     $param =~ s/^\s*//;
4747     if ( $param !~ /^#/ ) {
4748         Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
4749     }
4750     my $pattern = '^' . $param . '\s';
4751     if ( bad_pattern($pattern) ) {
4752         Die(
4753 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
4754         );
4755     }
4756     return $pattern;
4757 } ## end sub make_format_skipping_pattern
4758
4759 sub make_non_indenting_brace_pattern {
4760
4761     # Create the pattern used to identify static side comments.
4762     # Note that we are ending the pattern in a \s. This will allow
4763     # the pattern to be followed by a space and some text, or a newline.
4764     # The pattern is used in sub 'non_indenting_braces'
4765     $non_indenting_brace_pattern = '^#<<<\s';
4766
4767     # allow the user to change it
4768     if ( $rOpts->{'non-indenting-brace-prefix'} ) {
4769         my $prefix = $rOpts->{'non-indenting-brace-prefix'};
4770         $prefix =~ s/^\s*//;
4771         if ( $prefix !~ /^#/ ) {
4772             Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
4773         }
4774         my $pattern = '^' . $prefix . '\s';
4775         if ( bad_pattern($pattern) ) {
4776             Die(
4777 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
4778             );
4779         }
4780         $non_indenting_brace_pattern = $pattern;
4781     }
4782     return;
4783 } ## end sub make_non_indenting_brace_pattern
4784
4785 sub make_closing_side_comment_list_pattern {
4786
4787     # turn any input list into a regex for recognizing selected block types
4788     $closing_side_comment_list_pattern = '^\w+';
4789     if ( defined( $rOpts->{'closing-side-comment-list'} )
4790         && $rOpts->{'closing-side-comment-list'} )
4791     {
4792         $closing_side_comment_list_pattern =
4793           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
4794     }
4795     return;
4796 } ## end sub make_closing_side_comment_list_pattern
4797
4798 sub make_sub_matching_pattern {
4799
4800     # Patterns for standardizing matches to block types for regular subs and
4801     # anonymous subs. Examples
4802     #  'sub process' is a named sub
4803     #  'sub ::m' is a named sub
4804     #  'sub' is an anonymous sub
4805     #  'sub:' is a label, not a sub
4806     #  'sub :' is a label, not a sub   ( block type will be <sub:> )
4807     #   sub'_ is a named sub           ( block type will be <sub '_> )
4808     #  'substr' is a keyword
4809     # So note that named subs always have a space after 'sub'
4810     $SUB_PATTERN  = '^sub\s';    # match normal sub
4811     $ASUB_PATTERN = '^sub$';     # match anonymous sub
4812
4813     # Note (see also RT #133130): These patterns are used by
4814     # sub make_block_pattern, which is used for making most patterns.
4815     # So this sub needs to be called before other pattern-making routines.
4816
4817     if ( $rOpts->{'sub-alias-list'} ) {
4818
4819         # Note that any 'sub-alias-list' has been preprocessed to
4820         # be a trimmed, space-separated list which includes 'sub'
4821         # for example, it might be 'sub method fun'
4822         my $sub_alias_list = $rOpts->{'sub-alias-list'};
4823         $sub_alias_list =~ s/\s+/\|/g;
4824         $SUB_PATTERN    =~ s/sub/\($sub_alias_list\)/;
4825         $ASUB_PATTERN   =~ s/sub/\($sub_alias_list\)/;
4826     }
4827     return;
4828 } ## end sub make_sub_matching_pattern
4829
4830 sub make_bl_pattern {
4831
4832     # Set defaults lists to retain historical default behavior for -bl:
4833     my $bl_list_string           = '*';
4834     my $bl_exclusion_list_string = 'sort map grep eval asub';
4835
4836     if ( defined( $rOpts->{'brace-left-list'} )
4837         && $rOpts->{'brace-left-list'} )
4838     {
4839         $bl_list_string = $rOpts->{'brace-left-list'};
4840     }
4841     if ( $bl_list_string =~ /\bsub\b/ ) {
4842         $rOpts->{'opening-sub-brace-on-new-line'} ||=
4843           $rOpts->{'opening-brace-on-new-line'};
4844     }
4845     if ( $bl_list_string =~ /\basub\b/ ) {
4846         $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4847           $rOpts->{'opening-brace-on-new-line'};
4848     }
4849
4850     $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
4851
4852     # for -bl, a list with '*' turns on -sbl and -asbl
4853     if ( $bl_pattern =~ /\.\*/ ) {
4854         $rOpts->{'opening-sub-brace-on-new-line'} ||=
4855           $rOpts->{'opening-brace-on-new-line'};
4856         $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
4857           $rOpts->{'opening-anonymous-brace-on-new-line'};
4858     }
4859
4860     if ( defined( $rOpts->{'brace-left-exclusion-list'} )
4861         && $rOpts->{'brace-left-exclusion-list'} )
4862     {
4863         $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
4864         if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
4865             $rOpts->{'opening-sub-brace-on-new-line'} = 0;
4866         }
4867         if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
4868             $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
4869         }
4870     }
4871
4872     $bl_exclusion_pattern =
4873       make_block_pattern( '-blxl', $bl_exclusion_list_string );
4874     return;
4875 } ## end sub make_bl_pattern
4876
4877 sub make_bli_pattern {
4878
4879     # default list of block types for which -bli would apply
4880     my $bli_list_string = 'if else elsif unless while for foreach do : sub';
4881     my $bli_exclusion_list_string = SPACE;
4882
4883     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
4884         && $rOpts->{'brace-left-and-indent-list'} )
4885     {
4886         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
4887     }
4888
4889     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
4890
4891     if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
4892         && $rOpts->{'brace-left-and-indent-exclusion-list'} )
4893     {
4894         $bli_exclusion_list_string =
4895           $rOpts->{'brace-left-and-indent-exclusion-list'};
4896     }
4897     $bli_exclusion_pattern =
4898       make_block_pattern( '-blixl', $bli_exclusion_list_string );
4899     return;
4900 } ## end sub make_bli_pattern
4901
4902 sub make_keyword_group_list_pattern {
4903
4904     # turn any input list into a regex for recognizing selected block types.
4905     # Here are the defaults:
4906     $keyword_group_list_pattern         = '^(our|local|my|use|require|)$';
4907     $keyword_group_list_comment_pattern = EMPTY_STRING;
4908     if ( defined( $rOpts->{'keyword-group-blanks-list'} )
4909         && $rOpts->{'keyword-group-blanks-list'} )
4910     {
4911         my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
4912         my @keyword_list;
4913         my @comment_list;
4914         foreach my $word (@words) {
4915             if ( $word eq 'BC' || $word eq 'SBC' ) {
4916                 push @comment_list, $word;
4917                 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
4918             }
4919             else {
4920                 push @keyword_list, $word;
4921             }
4922         }
4923         $keyword_group_list_pattern =
4924           make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
4925         $keyword_group_list_comment_pattern =
4926           make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
4927     }
4928     return;
4929 } ## end sub make_keyword_group_list_pattern
4930
4931 sub make_block_brace_vertical_tightness_pattern {
4932
4933     # turn any input list into a regex for recognizing selected block types
4934     $block_brace_vertical_tightness_pattern =
4935       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4936     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
4937         && $rOpts->{'block-brace-vertical-tightness-list'} )
4938     {
4939         $block_brace_vertical_tightness_pattern =
4940           make_block_pattern( '-bbvtl',
4941             $rOpts->{'block-brace-vertical-tightness-list'} );
4942     }
4943     return;
4944 } ## end sub make_block_brace_vertical_tightness_pattern
4945
4946 sub make_blank_line_pattern {
4947
4948     $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
4949     my $key = 'blank-lines-before-closing-block-list';
4950     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4951         $blank_lines_before_closing_block_pattern =
4952           make_block_pattern( '-blbcl', $rOpts->{$key} );
4953     }
4954
4955     $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
4956     $key = 'blank-lines-after-opening-block-list';
4957     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
4958         $blank_lines_after_opening_block_pattern =
4959           make_block_pattern( '-blaol', $rOpts->{$key} );
4960     }
4961     return;
4962 } ## end sub make_blank_line_pattern
4963
4964 sub make_block_pattern {
4965
4966     #  given a string of block-type keywords, return a regex to match them
4967     #  The only tricky part is that labels are indicated with a single ':'
4968     #  and the 'sub' token text may have additional text after it (name of
4969     #  sub).
4970     #
4971     #  Example:
4972     #
4973     #   input string: "if else elsif unless while for foreach do : sub";
4974     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
4975
4976     #  Minor Update:
4977     #
4978     #  To distinguish between anonymous subs and named subs, use 'sub' to
4979     #   indicate a named sub, and 'asub' to indicate an anonymous sub
4980
4981     my ( $abbrev, $string ) = @_;
4982     my @list  = split_words($string);
4983     my @words = ();
4984     my %seen;
4985     for my $i (@list) {
4986         if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
4987         next if $seen{$i};
4988         $seen{$i} = 1;
4989         if ( $i eq 'sub' ) {
4990         }
4991         elsif ( $i eq 'asub' ) {
4992         }
4993         elsif ( $i eq ';' ) {
4994             push @words, ';';
4995         }
4996         elsif ( $i eq '{' ) {
4997             push @words, '\{';
4998         }
4999         elsif ( $i eq ':' ) {
5000             push @words, '\w+:';
5001         }
5002         elsif ( $i =~ /^\w/ ) {
5003             push @words, $i;
5004         }
5005         else {
5006             Warn("unrecognized block type $i after $abbrev, ignoring\n");
5007         }
5008     }
5009
5010     # Fix 2 for c091, prevent the pattern from matching an empty string
5011     # '1 ' is an impossible block name.
5012     if ( !@words ) { push @words, "1 " }
5013
5014     my $pattern      = '(' . join( '|', @words ) . ')$';
5015     my $sub_patterns = EMPTY_STRING;
5016     if ( $seen{'sub'} ) {
5017         $sub_patterns .= '|' . $SUB_PATTERN;
5018     }
5019     if ( $seen{'asub'} ) {
5020         $sub_patterns .= '|' . $ASUB_PATTERN;
5021     }
5022     if ($sub_patterns) {
5023         $pattern = '(' . $pattern . $sub_patterns . ')';
5024     }
5025     $pattern = '^' . $pattern;
5026     return $pattern;
5027 } ## end sub make_block_pattern
5028
5029 sub make_static_side_comment_pattern {
5030
5031     # create the pattern used to identify static side comments
5032     $static_side_comment_pattern = '^##';
5033
5034     # allow the user to change it
5035     if ( $rOpts->{'static-side-comment-prefix'} ) {
5036         my $prefix = $rOpts->{'static-side-comment-prefix'};
5037         $prefix =~ s/^\s*//;
5038         my $pattern = '^' . $prefix;
5039         if ( bad_pattern($pattern) ) {
5040             Die(
5041 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5042             );
5043         }
5044         $static_side_comment_pattern = $pattern;
5045     }
5046     return;
5047 } ## end sub make_static_side_comment_pattern
5048
5049 sub make_closing_side_comment_prefix {
5050
5051     # Be sure we have a valid closing side comment prefix
5052     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5053     my $csc_prefix_pattern;
5054     if ( !defined($csc_prefix) ) {
5055         $csc_prefix         = '## end';
5056         $csc_prefix_pattern = '^##\s+end';
5057     }
5058     else {
5059         my $test_csc_prefix = $csc_prefix;
5060         if ( $test_csc_prefix !~ /^#/ ) {
5061             $test_csc_prefix = '#' . $test_csc_prefix;
5062         }
5063
5064         # make a regex to recognize the prefix
5065         my $test_csc_prefix_pattern = $test_csc_prefix;
5066
5067         # escape any special characters
5068         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5069
5070         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5071
5072         # allow exact number of intermediate spaces to vary
5073         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5074
5075         # make sure we have a good pattern
5076         # if we fail this we probably have an error in escaping
5077         # characters.
5078
5079         if ( bad_pattern($test_csc_prefix_pattern) ) {
5080
5081             # shouldn't happen..must have screwed up escaping, above
5082             if (DEVEL_MODE) {
5083                 Fault(<<EOM);
5084 Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
5085 EOM
5086             }
5087
5088             # just warn and keep going with defaults
5089             Warn(
5090 "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5091             );
5092             Warn("Please consider using a simpler -cscp prefix\n");
5093             Warn("Using default -cscp instead; please check output\n");
5094         }
5095         else {
5096             $csc_prefix         = $test_csc_prefix;
5097             $csc_prefix_pattern = $test_csc_prefix_pattern;
5098         }
5099     }
5100     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5101     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5102     return;
5103 } ## end sub make_closing_side_comment_prefix
5104
5105 ##################################################
5106 # CODE SECTION 4: receive lines from the tokenizer
5107 ##################################################
5108
5109 {    ## begin closure write_line
5110
5111     my $nesting_depth;
5112
5113     # Variables used by sub check_sequence_numbers:
5114     my $last_seqno;
5115     my %saw_opening_seqno;
5116     my %saw_closing_seqno;
5117     my $initial_seqno;
5118
5119     sub initialize_write_line {
5120
5121         $nesting_depth = undef;
5122
5123         $last_seqno        = SEQ_ROOT;
5124         %saw_opening_seqno = ();
5125         %saw_closing_seqno = ();
5126
5127         return;
5128     } ## end sub initialize_write_line
5129
5130     sub check_sequence_numbers {
5131
5132         # Routine for checking sequence numbers.  This only needs to be
5133         # done occasionally in DEVEL_MODE to be sure everything is working
5134         # correctly.
5135         my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
5136         my $jmax = @{$rtokens} - 1;
5137         return unless ( $jmax >= 0 );
5138         foreach my $j ( 0 .. $jmax ) {
5139             my $seqno = $rtype_sequence->[$j];
5140             my $token = $rtokens->[$j];
5141             my $type  = $rtoken_type->[$j];
5142             $seqno = EMPTY_STRING unless ( defined($seqno) );
5143             my $err_msg =
5144 "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
5145
5146             if ( !$seqno ) {
5147
5148            # Sequence numbers are generated for opening tokens, so every opening
5149            # token should be sequenced.  Closing tokens will be unsequenced
5150            # if they do not have a matching opening token.
5151                 if (   $is_opening_sequence_token{$token}
5152                     && $type ne 'q'
5153                     && $type ne 'Q' )
5154                 {
5155                     Fault(
5156                         <<EOM
5157 $err_msg Unexpected opening token without sequence number
5158 EOM
5159                     );
5160                 }
5161             }
5162             else {
5163
5164                 # Save starting seqno to identify sequence method:
5165                 # New method starts with 2 and has continuous numbering
5166                 # Old method starts with >2 and may have gaps
5167                 if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
5168
5169                 if ( $is_opening_sequence_token{$token} ) {
5170
5171                     # New method should have continuous numbering
5172                     if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
5173                         Fault(
5174                             <<EOM
5175 $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
5176 EOM
5177                         );
5178                     }
5179                     $last_seqno = $seqno;
5180
5181                     # Numbers must be unique
5182                     if ( $saw_opening_seqno{$seqno} ) {
5183                         my $lno = $saw_opening_seqno{$seqno};
5184                         Fault(
5185                             <<EOM
5186 $err_msg Already saw an opening tokens at line $lno with this sequence number
5187 EOM
5188                         );
5189                     }
5190                     $saw_opening_seqno{$seqno} = $input_line_no;
5191                 }
5192
5193                 # only one closing item per seqno
5194                 elsif ( $is_closing_sequence_token{$token} ) {
5195                     if ( $saw_closing_seqno{$seqno} ) {
5196                         my $lno = $saw_closing_seqno{$seqno};
5197                         Fault(
5198                             <<EOM
5199 $err_msg Already saw a closing token with this seqno  at line $lno
5200 EOM
5201                         );
5202                     }
5203                     $saw_closing_seqno{$seqno} = $input_line_no;
5204
5205                     # Every closing seqno must have an opening seqno
5206                     if ( !$saw_opening_seqno{$seqno} ) {
5207                         Fault(
5208                             <<EOM
5209 $err_msg Saw a closing token but no opening token with this seqno
5210 EOM
5211                         );
5212                     }
5213                 }
5214
5215                 # Sequenced items must be opening or closing
5216                 else {
5217                     Fault(
5218                         <<EOM
5219 $err_msg Unexpected token type with a sequence number
5220 EOM
5221                     );
5222                 }
5223             }
5224         }
5225         return;
5226     } ## end sub check_sequence_numbers
5227
5228     sub write_line {
5229
5230         # This routine receives lines one-by-one from the tokenizer and stores
5231         # them in a format suitable for further processing.  After the last
5232         # line has been sent, the tokenizer will call sub 'finish_formatting'
5233         # to do the actual formatting.
5234
5235         my ( $self, $line_of_tokens_old ) = @_;
5236         my $rLL        = $self->[_rLL_];
5237         my $Klimit     = $self->[_Klimit_];
5238         my $rlines_new = $self->[_rlines_];
5239
5240         my $K_opening_container     = $self->[_K_opening_container_];
5241         my $K_closing_container     = $self->[_K_closing_container_];
5242         my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
5243         my $rblock_type_of_seqno    = $self->[_rblock_type_of_seqno_];
5244         my $rSS                     = $self->[_rSS_];
5245         my $Iss_opening             = $self->[_Iss_opening_];
5246         my $Iss_closing             = $self->[_Iss_closing_];
5247
5248         my $Kfirst;
5249         my $line_of_tokens = {};
5250         foreach (
5251             qw(
5252             _curly_brace_depth
5253             _ending_in_quote
5254             _guessed_indentation_level
5255             _line_number
5256             _line_text
5257             _line_type
5258             _paren_depth
5259             _quote_character
5260             _square_bracket_depth
5261             _starting_in_quote
5262             )
5263           )
5264         {
5265             $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
5266         }
5267
5268         # Data needed by Logger
5269         $line_of_tokens->{_level_0}          = 0;
5270         $line_of_tokens->{_ci_level_0}       = 0;
5271         $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
5272         $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
5273
5274         # Needed to avoid trimming quotes
5275         $line_of_tokens->{_ended_in_blank_token} = undef;
5276
5277         my $line_type   = $line_of_tokens_old->{_line_type};
5278         my $line_number = $line_of_tokens_old->{_line_number};
5279         my $CODE_type   = EMPTY_STRING;
5280         my $tee_output;
5281
5282         # Handle line of non-code
5283         if ( $line_type ne 'CODE' ) {
5284             $tee_output ||= $rOpts_tee_pod
5285               && substr( $line_type, 0, 3 ) eq 'POD';
5286         }
5287
5288         # Handle line of code
5289         else {
5290
5291             my $rtokens        = $line_of_tokens_old->{_rtokens};
5292             my $rtoken_type    = $line_of_tokens_old->{_rtoken_type};
5293             my $rblock_type    = $line_of_tokens_old->{_rblock_type};
5294             my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
5295             my $rlevels        = $line_of_tokens_old->{_rlevels};
5296             my $rci_levels     = $line_of_tokens_old->{_rci_levels};
5297
5298             my $jmax = @{$rtokens} - 1;
5299             if ( $jmax >= 0 ) {
5300                 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
5301
5302                 DEVEL_MODE
5303                   && check_sequence_numbers( $rtokens, $rtoken_type,
5304                     $rtype_sequence, $line_number );
5305
5306                 # Find the starting nesting depth ...
5307                 # It must be the value of variable 'level' of the first token
5308                 # because the nesting depth is used as a token tag in the
5309                 # vertical aligner and is compared to actual levels.
5310                 # So vertical alignment problems will occur with any other
5311                 # starting value.
5312                 if ( !defined($nesting_depth) ) {
5313                     $nesting_depth = $rlevels->[0];
5314                     $nesting_depth = 0 if ( $nesting_depth < 0 );
5315                     $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
5316                 }
5317
5318                 foreach my $j ( 0 .. $jmax ) {
5319
5320                     # Do not clip the 'level' variable yet. We will do this
5321                     # later, in sub 'store_token_to_go'. The reason is that in
5322                     # files with level errors, the logic in 'weld_cuddled_else'
5323                     # uses a stack logic that will give bad welds if we clip
5324                     # levels here.
5325                     ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
5326
5327                     # Handle tokens with sequence numbers ...
5328                     my $seqno = $rtype_sequence->[$j];
5329                     if ($seqno) {
5330                         my $token = $rtokens->[$j];
5331                         my $sign  = 1;
5332                         if ( $is_opening_token{$token} ) {
5333                             $K_opening_container->{$seqno} = @{$rLL};
5334                             $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
5335                             $nesting_depth++;
5336
5337                             # Save a sequenced block type at its opening token.
5338                             # Note that unsequenced block types can occur in
5339                             # unbalanced code with errors but are ignored here.
5340                             if ( $rblock_type->[$j] ) {
5341                                 my $block_type = $rblock_type->[$j];
5342                                 $rblock_type_of_seqno->{$seqno} = $block_type;
5343                                 if ( substr( $block_type, 0, 3 ) eq 'sub'
5344                                     || $rOpts_sub_alias_list )
5345                                 {
5346                                     if ( $block_type =~ /$ASUB_PATTERN/ ) {
5347                                         $self->[_ris_asub_block_]->{$seqno} = 1;
5348                                     }
5349                                     elsif ( $block_type =~ /$SUB_PATTERN/ ) {
5350                                         $self->[_ris_sub_block_]->{$seqno} = 1;
5351                                     }
5352                                 }
5353                             }
5354                         }
5355                         elsif ( $is_closing_token{$token} ) {
5356
5357                             # The opening depth should always be defined, and
5358                             # it should equal $nesting_depth-1.  To protect
5359                             # against unforseen error conditions, however, we
5360                             # will check this and fix things if necessary.  For
5361                             # a test case see issue c055.
5362                             my $opening_depth =
5363                               $rdepth_of_opening_seqno->[$seqno];
5364                             if ( !defined($opening_depth) ) {
5365                                 $opening_depth = $nesting_depth - 1;
5366                                 $opening_depth = 0 if ( $opening_depth < 0 );
5367                                 $rdepth_of_opening_seqno->[$seqno] =
5368                                   $opening_depth;
5369
5370                                 # This is not fatal but should not happen.  The
5371                                 # tokenizer generates sequence numbers
5372                                 # incrementally upon encountering each new
5373                                 # opening token, so every positive sequence
5374                                 # number should correspond to an opening token.
5375                                 if (DEVEL_MODE) {
5376                                     Fault(<<EOM);
5377 No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
5378 EOM
5379                                 }
5380                             }
5381                             $K_closing_container->{$seqno} = @{$rLL};
5382                             $nesting_depth                 = $opening_depth;
5383                             $sign                          = -1;
5384                         }
5385                         elsif ( $token eq '?' ) {
5386                         }
5387                         elsif ( $token eq ':' ) {
5388                             $sign = -1;
5389                         }
5390
5391                         # The only sequenced types output by the tokenizer are
5392                         # the opening & closing containers and the ternary
5393                         # types. So we would only get here if the tokenizer has
5394                         # been changed to mark some other tokens with sequence
5395                         # numbers, or if an error has been introduced in a
5396                         # hash such as %is_opening_container
5397                         else {
5398                             if (DEVEL_MODE) {
5399                                 Fault(<<EOM);
5400 Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
5401 Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
5402 EOM
5403                             }
5404                         }
5405
5406                         if ( $sign > 0 ) {
5407                             $Iss_opening->[$seqno] = @{$rSS};
5408
5409                             # For efficiency, we find the maximum level of
5410                             # opening tokens of any type.  The actual maximum
5411                             # level will be that of their contents which is 1
5412                             # greater.  That will be fixed in sub
5413                             # 'finish_formatting'.
5414                             my $level = $rlevels->[$j];
5415                             if ( $level > $self->[_maximum_level_] ) {
5416                                 $self->[_maximum_level_]         = $level;
5417                                 $self->[_maximum_level_at_line_] = $line_number;
5418                             }
5419                         }
5420                         else { $Iss_closing->[$seqno] = @{$rSS} }
5421                         push @{$rSS}, $sign * $seqno;
5422
5423                     }
5424                     else {
5425                         $seqno = EMPTY_STRING unless ( defined($seqno) );
5426                     }
5427
5428                     my @tokary;
5429                     @tokary[
5430                       _TOKEN_, _TYPE_,     _TYPE_SEQUENCE_,
5431                       _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
5432                       ]
5433                       = (
5434                         $rtokens->[$j],    $rtoken_type->[$j],
5435                         $seqno,            $rlevels->[$j],
5436                         $rci_levels->[$j], $line_number - 1,
5437                       );
5438                     push @{$rLL}, \@tokary;
5439                 } ## end foreach my $j ( 0 .. $jmax )
5440
5441                 $Klimit = @{$rLL} - 1;
5442
5443                 # Need to remember if we can trim the input line
5444                 $line_of_tokens->{_ended_in_blank_token} =
5445                   $rtoken_type->[$jmax] eq 'b';
5446
5447                 $line_of_tokens->{_level_0}    = $rlevels->[0];
5448                 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
5449                 $line_of_tokens->{_nesting_blocks_0} =
5450                   $line_of_tokens_old->{_nesting_blocks_0};
5451                 $line_of_tokens->{_nesting_tokens_0} =
5452                   $line_of_tokens_old->{_nesting_tokens_0};
5453
5454             } ## end if ( $jmax >= 0 )
5455
5456             $tee_output ||=
5457                  $rOpts_tee_block_comments
5458               && $jmax == 0
5459               && $rLL->[$Kfirst]->[_TYPE_] eq '#';
5460
5461             $tee_output ||=
5462                  $rOpts_tee_side_comments
5463               && defined($Kfirst)
5464               && $Klimit > $Kfirst
5465               && $rLL->[$Klimit]->[_TYPE_] eq '#';
5466
5467         } ## end if ( $line_type eq 'CODE')
5468
5469         # Finish storing line variables
5470         if ($tee_output) {
5471             my $fh_tee    = $self->[_fh_tee_];
5472             my $line_text = $line_of_tokens_old->{_line_text};
5473             $fh_tee->print($line_text) if ($fh_tee);
5474         }
5475
5476         $line_of_tokens->{_rK_range}  = [ $Kfirst, $Klimit ];
5477         $line_of_tokens->{_code_type} = $CODE_type;
5478         $self->[_Klimit_]             = $Klimit;
5479
5480         push @{$rlines_new}, $line_of_tokens;
5481         return;
5482     } ## end sub write_line
5483 } ## end closure write_line
5484
5485 #############################################
5486 # CODE SECTION 5: Pre-process the entire file
5487 #############################################
5488
5489 sub finish_formatting {
5490
5491     my ( $self, $severe_error ) = @_;
5492
5493     # The file has been tokenized and is ready to be formatted.
5494     # All of the relevant data is stored in $self, ready to go.
5495
5496     # Check the maximum level. If it is extremely large we will give up and
5497     # output the file verbatim.  Note that the actual maximum level is 1
5498     # greater than the saved value, so we fix that here.
5499     $self->[_maximum_level_] += 1;
5500     my $maximum_level       = $self->[_maximum_level_];
5501     my $maximum_table_index = $#maximum_line_length_at_level;
5502     if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
5503         $severe_error ||= 1;
5504         Warn(<<EOM);
5505 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
5506 Something may be wrong; formatting will be skipped.
5507 EOM
5508     }
5509
5510     # output file verbatim if severe error or no formatting requested
5511     if ( $severe_error || $rOpts->{notidy} ) {
5512         $self->dump_verbatim();
5513         $self->wrapup();
5514         return;
5515     }
5516
5517     # Update the 'save_logfile' flag based to include any tokenization errors.
5518     # We can save time by skipping logfile calls if it is not going to be saved.
5519     my $logger_object = $self->[_logger_object_];
5520     if ($logger_object) {
5521         $self->[_save_logfile_] = $logger_object->get_save_logfile();
5522     }
5523
5524     my $rix_side_comments = $self->set_CODE_type();
5525
5526     $self->find_non_indenting_braces($rix_side_comments);
5527
5528     # Handle any requested side comment deletions. It is easier to get
5529     # this done here rather than farther down the pipeline because IO
5530     # lines take a different route, and because lines with deleted HSC
5531     # become BL lines.  We have already handled any tee requests in sub
5532     # getline, so it is safe to delete side comments now.
5533     $self->delete_side_comments($rix_side_comments)
5534       if ( $rOpts_delete_side_comments
5535         || $rOpts_delete_closing_side_comments );
5536
5537     # Verify that the line hash does not have any unknown keys.
5538     $self->check_line_hashes() if (DEVEL_MODE);
5539
5540     # Make a pass through all tokens, adding or deleting any whitespace as
5541     # required.  Also make any other changes, such as adding semicolons.
5542     # All token changes must be made here so that the token data structure
5543     # remains fixed for the rest of this iteration.
5544     $self->respace_tokens();
5545
5546     $self->set_excluded_lp_containers();
5547
5548     $self->find_multiline_qw();
5549
5550     $self->keep_old_line_breaks();
5551
5552     # Implement any welding needed for the -wn or -cb options
5553     $self->weld_containers();
5554
5555     $self->collapsed_lengths()
5556       if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
5557
5558     # Locate small nested blocks which should not be broken
5559     $self->mark_short_nested_blocks();
5560
5561     $self->adjust_indentation_levels();
5562
5563     # Verify that the main token array looks OK.  If this ever causes a fault
5564     # then place similar checks before the sub calls above to localize the
5565     # problem.
5566     $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
5567
5568     # Finishes formatting and write the result to the line sink.
5569     # Eventually this call should just change the 'rlines' data according to the
5570     # new line breaks and then return so that we can do an internal iteration
5571     # before continuing with the next stages of formatting.
5572     $self->process_all_lines();
5573
5574     # A final routine to tie up any loose ends
5575     $self->wrapup();
5576     return;
5577 } ## end sub finish_formatting
5578
5579 sub set_CODE_type {
5580     my ($self) = @_;
5581
5582     # Examine each line of code and set a flag '$CODE_type' to describe it.
5583     # Also return a list of lines with side comments.
5584
5585     my $rLL                  = $self->[_rLL_];
5586     my $Klimit               = $self->[_Klimit_];
5587     my $rlines               = $self->[_rlines_];
5588     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5589
5590     my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
5591     my $rOpts_format_skipping_end   = $rOpts->{'format-skipping-end'};
5592     my $rOpts_static_block_comment_prefix =
5593       $rOpts->{'static-block-comment-prefix'};
5594
5595     # Remember indexes of lines with side comments
5596     my @ix_side_comments;
5597
5598     my $In_format_skipping_section = 0;
5599     my $Saw_VERSION_in_this_file   = 0;
5600     my $has_side_comment           = 0;
5601     my ( $Kfirst, $Klast );
5602     my $CODE_type;
5603
5604     # Loop to set CODE_type
5605
5606     # Possible CODE_types
5607     # 'VB'  = Verbatim - line goes out verbatim (a quote)
5608     # 'FS'  = Format Skipping - line goes out verbatim
5609     # 'BL'  = Blank Line
5610     # 'HSC' = Hanging Side Comment - fix this hanging side comment
5611     # 'SBCX'= Static Block Comment Without Leading Space
5612     # 'SBC' = Static Block Comment
5613     # 'BC'  = Block Comment - an ordinary full line comment
5614     # 'IO'  = Indent Only - line goes out unchanged except for indentation
5615     # 'NIN' = No Internal Newlines - line does not get broken
5616     # 'VER' = VERSION statement
5617     # ''    = ordinary line of code with no restrictions
5618
5619     my $ix_line = -1;
5620     foreach my $line_of_tokens ( @{$rlines} ) {
5621         $ix_line++;
5622         my $input_line_no = $line_of_tokens->{_line_number};
5623         my $line_type     = $line_of_tokens->{_line_type};
5624
5625         my $Last_line_had_side_comment = $has_side_comment;
5626         if ($has_side_comment) {
5627             push @ix_side_comments, $ix_line - 1;
5628         }
5629         $has_side_comment = 0;
5630
5631         next unless ( $line_type eq 'CODE' );
5632
5633         my $Klast_prev = $Klast;
5634
5635         my $rK_range = $line_of_tokens->{_rK_range};
5636         ( $Kfirst, $Klast ) = @{$rK_range};
5637
5638         my $last_CODE_type = $CODE_type;
5639         $CODE_type = EMPTY_STRING;
5640
5641         my $input_line = $line_of_tokens->{_line_text};
5642         my $jmax       = defined($Kfirst) ? $Klast - $Kfirst : -1;
5643
5644         my $is_block_comment = 0;
5645         if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5646             if   ( $jmax == 0 ) { $is_block_comment = 1; }
5647             else                { $has_side_comment = 1 }
5648         }
5649
5650         # Write line verbatim if we are in a formatting skip section
5651         if ($In_format_skipping_section) {
5652
5653             # Note: extra space appended to comment simplifies pattern matching
5654             if (
5655                 $is_block_comment
5656
5657                 # optional fast pre-check
5658                 && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
5659                     || $rOpts_format_skipping_end )
5660
5661                 && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
5662                 /$format_skipping_pattern_end/
5663               )
5664             {
5665                 $In_format_skipping_section = 0;
5666                 write_logfile_entry(
5667                     "Line $input_line_no: Exiting format-skipping section\n");
5668             }
5669             $CODE_type = 'FS';
5670             goto NEXT;
5671         }
5672
5673         # Check for a continued quote..
5674         if ( $line_of_tokens->{_starting_in_quote} ) {
5675
5676             # A line which is entirely a quote or pattern must go out
5677             # verbatim.  Note: the \n is contained in $input_line.
5678             if ( $jmax <= 0 ) {
5679                 if ( ( $input_line =~ "\t" ) ) {
5680                     my $input_line_number = $line_of_tokens->{_line_number};
5681                     $self->note_embedded_tab($input_line_number);
5682                 }
5683                 $CODE_type = 'VB';
5684                 goto NEXT;
5685             }
5686         }
5687
5688         # See if we are entering a formatting skip section
5689         if (
5690             $is_block_comment
5691
5692             # optional fast pre-check
5693             && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
5694                 || $rOpts_format_skipping_begin )
5695
5696             && $rOpts_format_skipping
5697             && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
5698             /$format_skipping_pattern_begin/
5699           )
5700         {
5701             $In_format_skipping_section = 1;
5702             write_logfile_entry(
5703                 "Line $input_line_no: Entering format-skipping section\n");
5704             $CODE_type = 'FS';
5705             goto NEXT;
5706         }
5707
5708         # ignore trailing blank tokens (they will get deleted later)
5709         if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
5710             $jmax--;
5711         }
5712
5713         # blank line..
5714         if ( $jmax < 0 ) {
5715             $CODE_type = 'BL';
5716             goto NEXT;
5717         }
5718
5719         # Handle comments
5720         if ($is_block_comment) {
5721
5722             # see if this is a static block comment (starts with ## by default)
5723             my $is_static_block_comment = 0;
5724             my $no_leading_space        = substr( $input_line, 0, 1 ) eq '#';
5725             if (
5726
5727                 # optional fast pre-check
5728                 (
5729                     substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
5730                     || $rOpts_static_block_comment_prefix
5731                 )
5732
5733                 && $rOpts_static_block_comments
5734                 && $input_line =~ /$static_block_comment_pattern/
5735               )
5736             {
5737                 $is_static_block_comment = 1;
5738             }
5739
5740             # Check for comments which are line directives
5741             # Treat exactly as static block comments without leading space
5742             # reference: perlsyn, near end, section Plain Old Comments (Not!)
5743             # example: '# line 42 "new_filename.plx"'
5744             if (
5745                    $no_leading_space
5746                 && $input_line =~ /^\#   \s*
5747                            line \s+ (\d+)   \s*
5748                            (?:\s("?)([^"]+)\2)? \s*
5749                            $/x
5750               )
5751             {
5752                 $is_static_block_comment = 1;
5753             }
5754
5755             # look for hanging side comment ...
5756             if (
5757                 $Last_line_had_side_comment    # last line had side comment
5758                 && !$no_leading_space          # there is some leading space
5759                 && !
5760                 $is_static_block_comment    # do not make static comment hanging
5761               )
5762             {
5763
5764                 #  continuing an existing HSC chain?
5765                 if ( $last_CODE_type eq 'HSC' ) {
5766                     $has_side_comment = 1;
5767                     $CODE_type        = 'HSC';
5768                     goto NEXT;
5769                 }
5770
5771                 #  starting a new HSC chain?
5772                 elsif (
5773
5774                     $rOpts->{'hanging-side-comments'}    # user is allowing
5775                                                          # hanging side comments
5776                                                          # like this
5777
5778                     && ( defined($Klast_prev) && $Klast_prev > 1 )
5779
5780                     # and the previous side comment was not static (issue c070)
5781                     && !(
5782                            $rOpts->{'static-side-comments'}
5783                         && $rLL->[$Klast_prev]->[_TOKEN_] =~
5784                         /$static_side_comment_pattern/
5785                     )
5786
5787                   )
5788                 {
5789
5790                     # and it is not a closing side comment (issue c070).
5791                     my $K_penult = $Klast_prev - 1;
5792                     $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
5793                     my $follows_csc =
5794                       (      $rLL->[$K_penult]->[_TOKEN_] eq '}'
5795                           && $rLL->[$K_penult]->[_TYPE_] eq '}'
5796                           && $rLL->[$Klast_prev]->[_TOKEN_] =~
5797                           /$closing_side_comment_prefix_pattern/ );
5798
5799                     if ( !$follows_csc ) {
5800                         $has_side_comment = 1;
5801                         $CODE_type        = 'HSC';
5802                         goto NEXT;
5803                     }
5804                 }
5805             }
5806
5807             if ($is_static_block_comment) {
5808                 $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
5809                 goto NEXT;
5810             }
5811             elsif ($Last_line_had_side_comment
5812                 && !$rOpts_maximum_consecutive_blank_lines
5813                 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
5814             {
5815                 # Emergency fix to keep a block comment from becoming a hanging
5816                 # side comment.  This fix is for the case that blank lines
5817                 # cannot be inserted.  There is related code in sub
5818                 # 'process_line_of_CODE'
5819                 $CODE_type = 'SBCX';
5820                 goto NEXT;
5821             }
5822             else {
5823                 $CODE_type = 'BC';
5824                 goto NEXT;
5825             }
5826         }
5827
5828         # End of comments. Handle a line of normal code:
5829
5830         if ($rOpts_indent_only) {
5831             $CODE_type = 'IO';
5832             goto NEXT;
5833         }
5834
5835         if ( !$rOpts_add_newlines ) {
5836             $CODE_type = 'NIN';
5837             goto NEXT;
5838         }
5839
5840         #   Patch needed for MakeMaker.  Do not break a statement
5841         #   in which $VERSION may be calculated.  See MakeMaker.pm;
5842         #   this is based on the coding in it.
5843         #   The first line of a file that matches this will be eval'd:
5844         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
5845         #   Examples:
5846         #     *VERSION = \'1.01';
5847         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
5848         #   We will pass such a line straight through without breaking
5849         #   it unless -npvl is used.
5850
5851         #   Patch for problem reported in RT #81866, where files
5852         #   had been flattened into a single line and couldn't be
5853         #   tidied without -npvl.  There are two parts to this patch:
5854         #   First, it is not done for a really long line (80 tokens for now).
5855         #   Second, we will only allow up to one semicolon
5856         #   before the VERSION.  We need to allow at least one semicolon
5857         #   for statements like this:
5858         #      require Exporter;  our $VERSION = $Exporter::VERSION;
5859         #   where both statements must be on a single line for MakeMaker
5860
5861         if (  !$Saw_VERSION_in_this_file
5862             && $jmax < 80
5863             && $input_line =~
5864             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
5865         {
5866             $Saw_VERSION_in_this_file = 1;
5867             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
5868
5869             # This code type has lower priority than others
5870             $CODE_type = 'VER';
5871             goto NEXT;
5872         }
5873
5874       NEXT:
5875         $line_of_tokens->{_code_type} = $CODE_type;
5876     }
5877
5878     if ($has_side_comment) {
5879         push @ix_side_comments, $ix_line;
5880     }
5881
5882     return \@ix_side_comments;
5883 } ## end sub set_CODE_type
5884
5885 sub find_non_indenting_braces {
5886
5887     my ( $self, $rix_side_comments ) = @_;
5888     return unless ( $rOpts->{'non-indenting-braces'} );
5889     my $rLL    = $self->[_rLL_];
5890     my $Klimit = $self->[_Klimit_];
5891     return unless ( defined($rLL) && @{$rLL} );
5892     my $rlines               = $self->[_rlines_];
5893     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5894     my $rseqno_non_indenting_brace_by_ix =
5895       $self->[_rseqno_non_indenting_brace_by_ix_];
5896
5897     foreach my $ix ( @{$rix_side_comments} ) {
5898         my $line_of_tokens = $rlines->[$ix];
5899         my $line_type      = $line_of_tokens->{_line_type};
5900         if ( $line_type ne 'CODE' ) {
5901
5902             # shouldn't happen
5903             next;
5904         }
5905         my $CODE_type = $line_of_tokens->{_code_type};
5906         my $rK_range  = $line_of_tokens->{_rK_range};
5907         my ( $Kfirst, $Klast ) = @{$rK_range};
5908         unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5909
5910             # shouldn't happen
5911             next;
5912         }
5913         next unless ( $Klast > $Kfirst );    # maybe HSC
5914         my $token_sc = $rLL->[$Klast]->[_TOKEN_];
5915         my $K_m      = $Klast - 1;
5916         my $type_m   = $rLL->[$K_m]->[_TYPE_];
5917         if ( $type_m eq 'b' && $K_m > $Kfirst ) {
5918             $K_m--;
5919             $type_m = $rLL->[$K_m]->[_TYPE_];
5920         }
5921         my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
5922         if ($seqno_m) {
5923             my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
5924
5925             # The pattern ends in \s but we have removed the newline, so
5926             # we added it back for the match. That way we require an exact
5927             # match to the special string and also allow additional text.
5928             $token_sc .= "\n";
5929             if (   $block_type_m
5930                 && $is_opening_type{$type_m}
5931                 && $token_sc =~ /$non_indenting_brace_pattern/ )
5932             {
5933                 $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
5934             }
5935         }
5936     }
5937     return;
5938 } ## end sub find_non_indenting_braces
5939
5940 sub delete_side_comments {
5941     my ( $self, $rix_side_comments ) = @_;
5942
5943     # Given a list of indexes of lines with side comments, handle any
5944     # requested side comment deletions.
5945
5946     my $rLL                  = $self->[_rLL_];
5947     my $rlines               = $self->[_rlines_];
5948     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
5949     my $rseqno_non_indenting_brace_by_ix =
5950       $self->[_rseqno_non_indenting_brace_by_ix_];
5951
5952     foreach my $ix ( @{$rix_side_comments} ) {
5953         my $line_of_tokens = $rlines->[$ix];
5954         my $line_type      = $line_of_tokens->{_line_type};
5955
5956         # This fault shouldn't happen because we only saved CODE lines with
5957         # side comments in the TASK 1 loop above.
5958         if ( $line_type ne 'CODE' ) {
5959             if (DEVEL_MODE) {
5960                 my $lno = $ix + 1;
5961                 Fault(<<EOM);
5962 Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
5963 EOM
5964             }
5965             next;
5966         }
5967
5968         my $CODE_type = $line_of_tokens->{_code_type};
5969         my $rK_range  = $line_of_tokens->{_rK_range};
5970         my ( $Kfirst, $Klast ) = @{$rK_range};
5971
5972         unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
5973             if (DEVEL_MODE) {
5974                 my $lno = $ix + 1;
5975                 Fault(<<EOM);
5976 Did not find side comment near line $lno while deleting side comments
5977 EOM
5978             }
5979             next;
5980         }
5981
5982         my $delete_side_comment =
5983              $rOpts_delete_side_comments
5984           && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
5985           && (!$CODE_type
5986             || $CODE_type eq 'HSC'
5987             || $CODE_type eq 'IO'
5988             || $CODE_type eq 'NIN' );
5989
5990         # Do not delete special control side comments
5991         if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
5992             $delete_side_comment = 0;
5993         }
5994
5995         if (
5996                $rOpts_delete_closing_side_comments
5997             && !$delete_side_comment
5998             && $Klast > $Kfirst
5999             && (  !$CODE_type
6000                 || $CODE_type eq 'HSC'
6001                 || $CODE_type eq 'IO'
6002                 || $CODE_type eq 'NIN' )
6003           )
6004         {
6005             my $token  = $rLL->[$Klast]->[_TOKEN_];
6006             my $K_m    = $Klast - 1;
6007             my $type_m = $rLL->[$K_m]->[_TYPE_];
6008             if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
6009             my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
6010             if ($seqno_m) {
6011                 my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
6012                 if (   $block_type_m
6013                     && $token        =~ /$closing_side_comment_prefix_pattern/
6014                     && $block_type_m =~ /$closing_side_comment_list_pattern/ )
6015                 {
6016                     $delete_side_comment = 1;
6017                 }
6018             }
6019         } ## end if ( $rOpts_delete_closing_side_comments...)
6020
6021         if ($delete_side_comment) {
6022
6023             # We are actually just changing the side comment to a blank.
6024             # This may produce multiple blanks in a row, but sub respace_tokens
6025             # will check for this and fix it.
6026             $rLL->[$Klast]->[_TYPE_]  = 'b';
6027             $rLL->[$Klast]->[_TOKEN_] = SPACE;
6028
6029             # The -io option outputs the line text, so we have to update
6030             # the line text so that the comment does not reappear.
6031             if ( $CODE_type eq 'IO' ) {
6032                 my $line = EMPTY_STRING;
6033                 foreach my $KK ( $Kfirst .. $Klast - 1 ) {
6034                     $line .= $rLL->[$KK]->[_TOKEN_];
6035                 }
6036                 $line =~ s/\s+$//;
6037                 $line_of_tokens->{_line_text} = $line . "\n";
6038             }
6039
6040             # If we delete a hanging side comment the line becomes blank.
6041             if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
6042         }
6043     }
6044     return;
6045 } ## end sub delete_side_comments
6046
6047 sub dump_verbatim {
6048     my $self   = shift;
6049     my $rlines = $self->[_rlines_];
6050     foreach my $line ( @{$rlines} ) {
6051         my $input_line = $line->{_line_text};
6052         $self->write_unindented_line($input_line);
6053     }
6054     return;
6055 }
6056
6057 my %wU;
6058 my %wiq;
6059 my %is_wit;
6060 my %is_sigil;
6061 my %is_nonlist_keyword;
6062 my %is_nonlist_type;
6063 my %is_s_y_m_slash;
6064 my %is_unexpected_equals;
6065
6066 BEGIN {
6067
6068     # added 'U' to fix cases b1125 b1126 b1127
6069     my @q = qw(w U);
6070     @{wU}{@q} = (1) x scalar(@q);
6071
6072     @q = qw(w i q Q G C Z);
6073     @{wiq}{@q} = (1) x scalar(@q);
6074
6075     @q = qw(w i t);
6076     @{is_wit}{@q} = (1) x scalar(@q);
6077
6078     @q = qw($ & % * @);
6079     @{is_sigil}{@q} = (1) x scalar(@q);
6080
6081     # Parens following these keywords will not be marked as lists. Note that
6082     # 'for' is not included and is handled separately, by including 'f' in the
6083     # hash %is_counted_type, since it may or may not be a c-style for loop.
6084     @q = qw( if elsif unless and or );
6085     @is_nonlist_keyword{@q} = (1) x scalar(@q);
6086
6087     # Parens following these types will not be marked as lists
6088     @q = qw( && || );
6089     @is_nonlist_type{@q} = (1) x scalar(@q);
6090
6091     @q = qw( s y m / );
6092     @is_s_y_m_slash{@q} = (1) x scalar(@q);
6093
6094     @q = qw( = == != );
6095     @is_unexpected_equals{@q} = (1) x scalar(@q);
6096
6097 }
6098
6099 sub respace_tokens {
6100
6101     my $self = shift;
6102     return if $rOpts->{'indent-only'};
6103
6104     # This routine is called once per file to do as much formatting as possible
6105     # before new line breaks are set.
6106
6107     # This routine makes all necessary and possible changes to the tokenization
6108     # after the initial tokenization of the file. This is a tedious routine,
6109     # but basically it consists of inserting and deleting whitespace between
6110     # nonblank tokens according to the selected parameters. In a few cases
6111     # non-space characters are added, deleted or modified.
6112
6113     # The goal of this routine is to create a new token array which only needs
6114     # the definition of new line breaks and padding to complete formatting.  In
6115     # a few cases we have to cheat a little to achieve this goal.  In
6116     # particular, we may not know if a semicolon will be needed, because it
6117     # depends on how the line breaks go.  To handle this, we include the
6118     # semicolon as a 'phantom' which can be displayed as normal or as an empty
6119     # string.
6120
6121     # Method: The old tokens are copied one-by-one, with changes, from the old
6122     # linear storage array $rLL to a new array $rLL_new.
6123
6124     my $rLL             = $self->[_rLL_];
6125     my $Klimit_old      = $self->[_Klimit_];
6126     my $rlines          = $self->[_rlines_];
6127     my $length_function = $self->[_length_function_];
6128     my $is_encoded_data = $self->[_is_encoded_data_];
6129
6130     my $rLL_new = [];    # This is the new array
6131     my $rtoken_vars;
6132     my $Ktoken_vars;                   # the old K value of $rtoken_vars
6133     my ( $Kfirst_old, $Klast_old );    # Range of old line
6134     my $Klast_old_code;                # K of last token if side comment
6135     my $Kmax = @{$rLL} - 1;
6136
6137     my $CODE_type = EMPTY_STRING;
6138     my $line_type = EMPTY_STRING;
6139
6140     # Set the whitespace flags, which indicate the token spacing preference.
6141     my $rwhitespace_flags = $self->set_whitespace_flags();
6142
6143     # we will be setting token lengths as we go
6144     my $cumulative_length = 0;
6145
6146     my %seqno_stack;
6147     my %K_old_opening_by_seqno = ();    # Note: old K index
6148     my $depth_next             = 0;
6149     my $depth_next_max         = 0;
6150
6151     # Note that $K_opening_container and $K_closing_container have values
6152     # defined in sub get_line() for the previous K indexes.  They were needed
6153     # in case option 'indent-only' was set, and we didn't get here. We no longer
6154     # need those and will eliminate them now to avoid any possible mixing of
6155     # old and new values.
6156     my $K_opening_container = $self->[_K_opening_container_] = {};
6157     my $K_closing_container = $self->[_K_closing_container_] = {};
6158
6159     my $K_closing_ternary         = $self->[_K_closing_ternary_];
6160     my $K_opening_ternary         = $self->[_K_opening_ternary_];
6161     my $rK_phantom_semicolons     = $self->[_rK_phantom_semicolons_];
6162     my $rchildren_of_seqno        = $self->[_rchildren_of_seqno_];
6163     my $rhas_broken_code_block    = $self->[_rhas_broken_code_block_];
6164     my $rhas_broken_list          = $self->[_rhas_broken_list_];
6165     my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
6166     my $rhas_code_block           = $self->[_rhas_code_block_];
6167     my $rhas_list                 = $self->[_rhas_list_];
6168     my $rhas_ternary              = $self->[_rhas_ternary_];
6169     my $ris_assigned_structure    = $self->[_ris_assigned_structure_];
6170     my $ris_broken_container      = $self->[_ris_broken_container_];
6171     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
6172     my $ris_list_by_seqno         = $self->[_ris_list_by_seqno_];
6173     my $ris_permanently_broken    = $self->[_ris_permanently_broken_];
6174     my $rlec_count_by_seqno       = $self->[_rlec_count_by_seqno_];
6175     my $roverride_cab3            = $self->[_roverride_cab3_];
6176     my $rparent_of_seqno          = $self->[_rparent_of_seqno_];
6177     my $rtype_count_by_seqno      = $self->[_rtype_count_by_seqno_];
6178     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
6179
6180     my $last_nonblank_code_type       = ';';
6181     my $last_nonblank_code_token      = ';';
6182     my $last_nonblank_block_type      = EMPTY_STRING;
6183     my $last_last_nonblank_code_type  = ';';
6184     my $last_last_nonblank_code_token = ';';
6185
6186     my %K_first_here_doc_by_seqno;
6187
6188     my $set_permanently_broken = sub {
6189         my ($seqno) = @_;
6190         while ( defined($seqno) ) {
6191             $ris_permanently_broken->{$seqno} = 1;
6192             $seqno = $rparent_of_seqno->{$seqno};
6193         }
6194         return;
6195     };
6196     my $store_token = sub {
6197         my ($item) = @_;
6198
6199         # This will be the index of this item in the new array
6200         my $KK_new = @{$rLL_new};
6201
6202         #------------------------------------------------------------------
6203         # NOTE: called once per token so coding efficiency is critical here
6204         #------------------------------------------------------------------
6205
6206         my $type       = $item->[_TYPE_];
6207         my $is_blank   = $type eq 'b';
6208         my $block_type = EMPTY_STRING;
6209
6210         # Do not output consecutive blanks. This situation should have been
6211         # prevented earlier, but it is worth checking because later routines
6212         # make this assumption.
6213         if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
6214             return;
6215         }
6216
6217         # check for a sequenced item (i.e., container or ?/:)
6218         my $type_sequence = $item->[_TYPE_SEQUENCE_];
6219         my $token         = $item->[_TOKEN_];
6220         if ($type_sequence) {
6221
6222             if ( $is_opening_token{$token} ) {
6223
6224                 $K_opening_container->{$type_sequence} = $KK_new;
6225                 $block_type = $rblock_type_of_seqno->{$type_sequence};
6226
6227                 # Fix for case b1100: Count a line ending in ', [' as having
6228                 # a line-ending comma.  Otherwise, these commas can be hidden
6229                 # with something like --opening-square-bracket-right
6230                 if (   $last_nonblank_code_type eq ','
6231                     && $Ktoken_vars == $Klast_old_code
6232                     && $Ktoken_vars > $Kfirst_old )
6233                 {
6234                     $rlec_count_by_seqno->{$type_sequence}++;
6235                 }
6236
6237                 if (   $last_nonblank_code_type eq '='
6238                     || $last_nonblank_code_type eq '=>' )
6239                 {
6240                     $ris_assigned_structure->{$type_sequence} =
6241                       $last_nonblank_code_type;
6242                 }
6243
6244                 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6245                 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6246                 push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
6247                 $rparent_of_seqno->{$type_sequence}     = $seqno_parent;
6248                 $seqno_stack{$depth_next}               = $type_sequence;
6249                 $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
6250                 $depth_next++;
6251
6252                 if ( $depth_next > $depth_next_max ) {
6253                     $depth_next_max = $depth_next;
6254                 }
6255             }
6256             elsif ( $is_closing_token{$token} ) {
6257
6258                 $K_closing_container->{$type_sequence} = $KK_new;
6259                 $block_type = $rblock_type_of_seqno->{$type_sequence};
6260
6261                 # Do not include terminal commas in counts
6262                 if (   $last_nonblank_code_type eq ','
6263                     || $last_nonblank_code_type eq '=>' )
6264                 {
6265                     my $seqno = $seqno_stack{ $depth_next - 1 };
6266                     if ($seqno) {
6267                         $rtype_count_by_seqno->{$seqno}
6268                           ->{$last_nonblank_code_type}--;
6269
6270                         if (   $Ktoken_vars == $Kfirst_old
6271                             && $last_nonblank_code_type eq ','
6272                             && $rlec_count_by_seqno->{$seqno} )
6273                         {
6274                             $rlec_count_by_seqno->{$seqno}--;
6275                         }
6276                     }
6277                 }
6278
6279                 # Update the stack...
6280                 $depth_next--;
6281             }
6282             else {
6283
6284                 # For ternary, note parent but do not include as child
6285                 my $seqno_parent = $seqno_stack{ $depth_next - 1 };
6286                 $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
6287                 $rparent_of_seqno->{$type_sequence} = $seqno_parent;
6288
6289                 # These are not yet used but could be useful
6290                 if ( $token eq '?' ) {
6291                     $K_opening_ternary->{$type_sequence} = $KK_new;
6292                 }
6293                 elsif ( $token eq ':' ) {
6294                     $K_closing_ternary->{$type_sequence} = $KK_new;
6295                 }
6296                 else {
6297
6298                     # We really shouldn't arrive here, just being cautious:
6299                     # The only sequenced types output by the tokenizer are the
6300                     # opening & closing containers and the ternary types. Each
6301                     # of those was checked above. So we would only get here
6302                     # if the tokenizer has been changed to mark some other
6303                     # tokens with sequence numbers.
6304                     if (DEVEL_MODE) {
6305                         Fault(
6306 "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
6307                         );
6308                     }
6309                 }
6310             }
6311         }
6312
6313         # Find the length of this token.  Later it may be adjusted if phantom
6314         # or ignoring side comment lengths.
6315         my $token_length =
6316             $is_encoded_data
6317           ? $length_function->($token)
6318           : length($token);
6319
6320         # handle comments
6321         my $is_comment = $type eq '#';
6322         if ($is_comment) {
6323
6324             # trim comments if necessary
6325             my $ord = ord( substr( $token, -1, 1 ) );
6326             if (
6327                 $ord > 0
6328                 && (   $ord < ORD_PRINTABLE_MIN
6329                     || $ord > ORD_PRINTABLE_MAX )
6330                 && $token =~ s/\s+$//
6331               )
6332             {
6333                 $token_length = $length_function->($token);
6334                 $item->[_TOKEN_] = $token;
6335             }
6336
6337             # Mark length of side comments as just 1 if sc lengths are ignored
6338             if ( $rOpts_ignore_side_comment_lengths
6339                 && ( !$CODE_type || $CODE_type eq 'HSC' ) )
6340             {
6341                 $token_length = 1;
6342             }
6343             my $seqno = $seqno_stack{ $depth_next - 1 };
6344             if ( defined($seqno)
6345                 && !$ris_permanently_broken->{$seqno} )
6346             {
6347                 $set_permanently_broken->($seqno);
6348             }
6349         }
6350
6351         $item->[_TOKEN_LENGTH_] = $token_length;
6352
6353         # and update the cumulative length
6354         $cumulative_length += $token_length;
6355
6356         # Save the length sum to just AFTER this token
6357         $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
6358
6359         if ( !$is_blank && !$is_comment ) {
6360
6361             # Remember the most recent two non-blank, non-comment tokens.
6362             # NOTE: the phantom semicolon code may change the output stack
6363             # without updating these values.  Phantom semicolons are considered
6364             # the same as blanks for now, but future needs might change that.
6365             # See the related note in sub '$add_phantom_semicolon'.
6366             $last_last_nonblank_code_type  = $last_nonblank_code_type;
6367             $last_last_nonblank_code_token = $last_nonblank_code_token;
6368
6369             $last_nonblank_code_type  = $type;
6370             $last_nonblank_code_token = $token;
6371             $last_nonblank_block_type = $block_type;
6372
6373             # count selected types
6374             if ( $is_counted_type{$type} ) {
6375                 my $seqno = $seqno_stack{ $depth_next - 1 };
6376                 if ( defined($seqno) ) {
6377                     $rtype_count_by_seqno->{$seqno}->{$type}++;
6378
6379                     # Count line-ending commas for -bbx
6380                     if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
6381                         $rlec_count_by_seqno->{$seqno}++;
6382                     }
6383
6384                     # Remember index of first here doc target
6385                     if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
6386                         $K_first_here_doc_by_seqno{$seqno} = $KK_new;
6387                     }
6388                 }
6389             }
6390         }
6391
6392         # For reference, here is how to get the parent sequence number.
6393         # This is not used because it is slower than finding it on the fly
6394         # in sub parent_seqno_by_K:
6395
6396         # my $seqno_parent =
6397         #     $type_sequence && $is_opening_token{$token}
6398         #   ? $seqno_stack{ $depth_next - 2 }
6399         #   : $seqno_stack{ $depth_next - 1 };
6400         # my $KK = @{$rLL_new};
6401         # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
6402
6403         # and finally, add this item to the new array
6404         push @{$rLL_new}, $item;
6405         return;
6406     };
6407
6408     my $store_token_and_space = sub {
6409         my ( $item, $want_space ) = @_;
6410
6411         # store a token with preceding space if requested and needed
6412
6413         # First store the space
6414         if (   $want_space
6415             && @{$rLL_new}
6416             && $rLL_new->[-1]->[_TYPE_] ne 'b'
6417             && $rOpts_add_whitespace )
6418         {
6419             my $rcopy = [ @{$item} ];
6420             $rcopy->[_TYPE_]          = 'b';
6421             $rcopy->[_TOKEN_]         = SPACE;
6422             $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING;
6423
6424             $rcopy->[_LINE_INDEX_] =
6425               $rLL_new->[-1]->[_LINE_INDEX_];
6426
6427             # Patch 23-Jan-2021 to fix -lp blinkers:
6428             # The level and ci_level of newly created spaces should be the same
6429             # as the previous token.  Otherwise the coding for the -lp option
6430             # can create a blinking state in some rare cases.
6431             $rcopy->[_LEVEL_] =
6432               $rLL_new->[-1]->[_LEVEL_];
6433             $rcopy->[_CI_LEVEL_] =
6434               $rLL_new->[-1]->[_CI_LEVEL_];
6435
6436             $store_token->($rcopy);
6437         }
6438
6439         # then the token
6440         $store_token->($item);
6441         return;
6442     };
6443
6444     my $add_phantom_semicolon = sub {
6445
6446         my ($KK) = @_;
6447
6448         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6449         return unless ( defined($Kp) );
6450
6451         # we are only adding semicolons for certain block types
6452         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
6453         return unless ($type_sequence);
6454         my $block_type = $rblock_type_of_seqno->{$type_sequence};
6455         return unless ($block_type);
6456         return
6457           unless ( $ok_to_add_semicolon_for_block_type{$block_type}
6458             || $block_type =~ /^(sub|package)/
6459             || $block_type =~ /^\w+\:$/ );
6460
6461         my $type_p          = $rLL_new->[$Kp]->[_TYPE_];
6462         my $token_p         = $rLL_new->[$Kp]->[_TOKEN_];
6463         my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6464
6465         # Do not add a semicolon if...
6466         return
6467           if (
6468
6469             # it would follow a comment (and be isolated)
6470             $type_p eq '#'
6471
6472             # it follows a code block ( because they are not always wanted
6473             # there and may add clutter)
6474             || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
6475
6476             # it would follow a label
6477             || $type_p eq 'J'
6478
6479             # it would be inside a 'format' statement (and cause syntax error)
6480             || (   $type_p eq 'k'
6481                 && $token_p =~ /format/ )
6482
6483           );
6484
6485         # Do not add a semicolon if it would impede a weld with an immediately
6486         # following closing token...like this
6487         #   { ( some code ) }
6488         #                  ^--No semicolon can go here
6489
6490         # look at the previous token... note use of the _NEW rLL array here,
6491         # but sequence numbers are invariant.
6492         my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
6493
6494         # If it is also a CLOSING token we have to look closer...
6495         if (
6496                $seqno_inner
6497             && $is_closing_token{$token_p}
6498
6499             # we only need to look if there is just one inner container..
6500             && defined( $rchildren_of_seqno->{$type_sequence} )
6501             && @{ $rchildren_of_seqno->{$type_sequence} } == 1
6502           )
6503         {
6504
6505             # Go back and see if the corresponding two OPENING tokens are also
6506             # together.  Note that we are using the OLD K indexing here:
6507             my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
6508             if ( defined($K_outer_opening) ) {
6509                 my $K_nxt = $self->K_next_nonblank($K_outer_opening);
6510                 if ( defined($K_nxt) ) {
6511                     my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
6512
6513                     # Is the next token after the outer opening the same as
6514                     # our inner closing (i.e. same sequence number)?
6515                     # If so, do not insert a semicolon here.
6516                     return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
6517                 }
6518             }
6519         }
6520
6521         # We will insert an empty semicolon here as a placeholder.  Later, if
6522         # it becomes the last token on a line, we will bring it to life.  The
6523         # advantage of doing this is that (1) we just have to check line
6524         # endings, and (2) the phantom semicolon has zero width and therefore
6525         # won't cause needless breaks of one-line blocks.
6526         my $Ktop = -1;
6527         if (   $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
6528             && $want_left_space{';'} == WS_NO )
6529         {
6530
6531             # convert the blank into a semicolon..
6532             # be careful: we are working on the new stack top
6533             # on a token which has been stored.
6534             my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
6535
6536             # Convert the existing blank to:
6537             #   a phantom semicolon for one_line_block option = 0 or 1
6538             #   a real semicolon    for one_line_block option = 2
6539             my $tok     = EMPTY_STRING;
6540             my $len_tok = 0;
6541             if ( $rOpts_one_line_block_semicolons == 2 ) {
6542                 $tok     = ';';
6543                 $len_tok = 1;
6544             }
6545
6546             $rLL_new->[$Ktop]->[_TOKEN_]        = $tok;
6547             $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
6548             $rLL_new->[$Ktop]->[_TYPE_]         = ';';
6549
6550             # NOTE: we are changing the output stack without updating variables
6551             # $last_nonblank_code_type, etc. Future needs might require that
6552             # those variables be updated here.  For now, it seems ok to skip
6553             # this.
6554
6555             # Save list of new K indexes of phantom semicolons.
6556             # This will be needed if we want to undo them for iterations in
6557             # future coding.
6558             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6559
6560             # Then store a new blank
6561             $store_token->($rcopy);
6562         }
6563         else {
6564
6565             # Patch for issue c078: keep line indexes in order.  If the top
6566             # token is a space that we are keeping (due to '-wls=';') then
6567             # we have to check that old line indexes stay in order.
6568             # In very rare
6569             # instances in which side comments have been deleted and converted
6570             # into blanks, we may have filtered down multiple blanks into just
6571             # one. In that case the top blank may have a higher line number
6572             # than the previous nonblank token. Although the line indexes of
6573             # blanks are not really significant, we need to keep them in order
6574             # in order to pass error checks.
6575             if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
6576                 my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
6577                 my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
6578                 if ( $new_top_ix < $old_top_ix ) {
6579                     $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
6580                 }
6581             }
6582
6583             my $rcopy =
6584               copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
6585             $store_token->($rcopy);
6586             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
6587         }
6588         return;
6589     };
6590
6591     my $check_Q = sub {
6592
6593         # Check that a quote looks okay
6594         # This sub works but needs to by sync'd with the log file output
6595         # before it can be used.
6596         my ( $KK, $Kfirst, $line_number ) = @_;
6597         my $token = $rLL->[$KK]->[_TOKEN_];
6598         $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
6599
6600         # The remainder of this routine looks for something like
6601         #        '$var = s/xxx/yyy/;'
6602         # in case it should have been '$var =~ s/xxx/yyy/;'
6603
6604         # Start by looking for a token beginning with one of: s y m / tr
6605         return
6606           unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
6607             || substr( $token, 0, 2 ) eq 'tr' );
6608
6609         # ... and preceded by one of: = == !=
6610         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
6611         return unless ( defined($Kp) );
6612         my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
6613         return unless ( $is_unexpected_equals{$previous_nonblank_type} );
6614         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
6615
6616         my $previous_nonblank_type_2  = 'b';
6617         my $previous_nonblank_token_2 = EMPTY_STRING;
6618         my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
6619         if ( defined($Kpp) ) {
6620             $previous_nonblank_type_2  = $rLL_new->[$Kpp]->[_TYPE_];
6621             $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
6622         }
6623
6624         my $next_nonblank_token = EMPTY_STRING;
6625         my $Kn                  = $KK + 1;
6626         if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
6627         if ( $Kn <= $Kmax ) {
6628             $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
6629         }
6630
6631         my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
6632         my $type_0  = $rLL->[$Kfirst]->[_TYPE_];
6633
6634         if (
6635             ##$token =~ /^(s|tr|y|m|\/)/
6636             ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
6637             1
6638
6639             # preceded by simple scalar
6640             && $previous_nonblank_type_2 eq 'i'
6641             && $previous_nonblank_token_2 =~ /^\$/
6642
6643             # followed by some kind of termination
6644             # (but give complaint if we can not see far enough ahead)
6645             && $next_nonblank_token =~ /^[; \)\}]$/
6646
6647             # scalar is not declared
6648             ##                      =~ /^(my|our|local)$/
6649             && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
6650           )
6651         {
6652             my $lno   = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
6653             my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
6654             complain(
6655 "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
6656             );
6657         }
6658         return;
6659     };
6660
6661     #-------------------------------------------
6662     # Main loop to respace all lines of the file
6663     #-------------------------------------------
6664     my $last_K_out;
6665
6666     foreach my $line_of_tokens ( @{$rlines} ) {
6667
6668         my $input_line_number = $line_of_tokens->{_line_number};
6669         my $last_line_type    = $line_type;
6670         $line_type = $line_of_tokens->{_line_type};
6671         next unless ( $line_type eq 'CODE' );
6672         my $last_CODE_type = $CODE_type;
6673         $CODE_type = $line_of_tokens->{_code_type};
6674         my $rK_range = $line_of_tokens->{_rK_range};
6675         my ( $Kfirst, $Klast ) = @{$rK_range};
6676         next unless defined($Kfirst);
6677         ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
6678         $Klast_old_code = $Klast_old;
6679
6680         # Be sure an old K value is defined for sub $store_token
6681         $Ktoken_vars = $Kfirst;
6682
6683         # Check for correct sequence of token indexes...
6684         # An error here means that sub write_line() did not correctly
6685         # package the tokenized lines as it received them.  If we
6686         # get a fault here it has not output a continuous sequence
6687         # of K values.  Or a line of CODE may have been mis-marked as
6688         # something else.  There is no good way to continue after such an
6689         # error.
6690         # FIXME: Calling Fault will produce zero output; it would be best to
6691         # find a way to dump the input file.
6692         if ( defined($last_K_out) ) {
6693             if ( $Kfirst != $last_K_out + 1 ) {
6694                 Fault(
6695                     "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
6696                 );
6697             }
6698         }
6699         else {
6700
6701             # The first token should always have been given index 0 by sub
6702             # write_line()
6703             if ( $Kfirst != 0 ) {
6704                 Fault("Program Bug: first K is $Kfirst but should be 0");
6705             }
6706         }
6707         $last_K_out = $Klast;
6708
6709         # Handle special lines of code
6710         if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
6711
6712             # CODE_types are as follows.
6713             # 'BL' = Blank Line
6714             # 'VB' = Verbatim - line goes out verbatim
6715             # 'FS' = Format Skipping - line goes out verbatim, no blanks
6716             # 'IO' = Indent Only - only indentation may be changed
6717             # 'NIN' = No Internal Newlines - line does not get broken
6718             # 'HSC'=Hanging Side Comment - fix this hanging side comment
6719             # 'BC'=Block Comment - an ordinary full line comment
6720             # 'SBC'=Static Block Comment - a block comment which does not get
6721             #      indented
6722             # 'SBCX'=Static Block Comment Without Leading Space
6723             # 'VER'=VERSION statement
6724             # '' or (undefined) - no restructions
6725
6726             # For a hanging side comment we insert an empty quote before
6727             # the comment so that it becomes a normal side comment and
6728             # will be aligned by the vertical aligner
6729             if ( $CODE_type eq 'HSC' ) {
6730
6731                 # Safety Check: This must be a line with one token (a comment)
6732                 my $rvars_Kfirst = $rLL->[$Kfirst];
6733                 if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) {
6734
6735                     # Note that even if the flag 'noadd-whitespace' is set, we
6736                     # will make an exception here and allow a blank to be
6737                     # inserted to push the comment to the right.  We can think
6738                     # of this as an adjustment of indentation rather than
6739                     # whitespace between tokens. This will also prevent the
6740                     # hanging side comment from getting converted to a block
6741                     # comment if whitespace gets deleted, as for example with
6742                     # the -extrude and -mangle options.
6743                     my $rcopy =
6744                       copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
6745                     $store_token->($rcopy);
6746                     $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
6747                     $store_token->($rcopy);
6748                     $store_token->($rvars_Kfirst);
6749                     next;
6750                 }
6751                 else {
6752
6753                     # This line was mis-marked by sub scan_comment.  Catch in
6754                     # DEVEL_MODE, otherwise try to repair and keep going.
6755                     Fault(
6756                         "Program bug. A hanging side comment has been mismarked"
6757                     ) if (DEVEL_MODE);
6758
6759                     $CODE_type = EMPTY_STRING;
6760                     $line_of_tokens->{_code_type} = $CODE_type;
6761                 }
6762             }
6763
6764             if ( $CODE_type eq 'BL' ) {
6765                 my $seqno = $seqno_stack{ $depth_next - 1 };
6766                 if (   defined($seqno)
6767                     && !$ris_permanently_broken->{$seqno}
6768                     && $rOpts_maximum_consecutive_blank_lines )
6769                 {
6770                     $set_permanently_broken->($seqno);
6771                 }
6772             }
6773
6774             # Copy tokens unchanged
6775             foreach my $KK ( $Kfirst .. $Klast ) {
6776                 $Ktoken_vars = $KK;
6777                 $store_token->( $rLL->[$KK] );
6778             }
6779             next;
6780         }
6781
6782         # Handle normal line..
6783
6784         # Define index of last token before any side comment for comma counts
6785         my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
6786         if ( ( $type_end eq '#' || $type_end eq 'b' )
6787             && $Klast_old_code > $Kfirst_old )
6788         {
6789             $Klast_old_code--;
6790             if (   $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
6791                 && $Klast_old_code > $Kfirst_old )
6792             {
6793                 $Klast_old_code--;
6794             }
6795         }
6796
6797         # Insert any essential whitespace between lines
6798         # if last line was normal CODE.
6799         # Patch for rt #125012: use K_previous_code rather than '_nonblank'
6800         # because comments may disappear.
6801         if ( $last_line_type eq 'CODE' ) {
6802             my $type_next  = $rLL->[$Kfirst]->[_TYPE_];
6803             my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
6804             if (
6805                 is_essential_whitespace(
6806                     $last_last_nonblank_code_token,
6807                     $last_last_nonblank_code_type,
6808                     $last_nonblank_code_token,
6809                     $last_nonblank_code_type,
6810                     $token_next,
6811                     $type_next,
6812                 )
6813               )
6814             {
6815
6816                 # Copy this first token as blank, but use previous line number
6817                 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE );
6818                 $rcopy->[_LINE_INDEX_] =
6819                   $rLL_new->[-1]->[_LINE_INDEX_];
6820
6821                 # The level and ci_level of newly created spaces should be the
6822                 # same as the previous token. Otherwise blinking states can
6823                 # be created if the -lp mode is used. See similar coding in
6824                 # sub 'store_token_and_space'.  Fixes cases b1109 b1110.
6825                 $rcopy->[_LEVEL_] =
6826                   $rLL_new->[-1]->[_LEVEL_];
6827                 $rcopy->[_CI_LEVEL_] =
6828                   $rLL_new->[-1]->[_CI_LEVEL_];
6829
6830                 $store_token->($rcopy);
6831             }
6832         }
6833
6834         #-------------------------------------------------------
6835         # Loop to copy all tokens on this line, with any changes
6836         #-------------------------------------------------------
6837         my $type_sequence;
6838         foreach my $KK ( $Kfirst .. $Klast ) {
6839             $Ktoken_vars = $KK;
6840             $rtoken_vars = $rLL->[$KK];
6841             my $token              = $rtoken_vars->[_TOKEN_];
6842             my $type               = $rtoken_vars->[_TYPE_];
6843             my $last_type_sequence = $type_sequence;
6844             $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
6845
6846             # Handle a blank space ...
6847             if ( $type eq 'b' ) {
6848
6849                 # Delete it if not wanted by whitespace rules
6850                 # or we are deleting all whitespace
6851                 # Note that whitespace flag is a flag indicating whether a
6852                 # white space BEFORE the token is needed
6853                 next if ( $KK >= $Klast );    # skip terminal blank
6854                 my $Knext = $KK + 1;
6855
6856                 if ($rOpts_freeze_whitespace) {
6857                     $store_token->($rtoken_vars);
6858                     next;
6859                 }
6860
6861                 my $ws = $rwhitespace_flags->[$Knext];
6862                 if (   $ws == -1
6863                     || $rOpts_delete_old_whitespace )
6864                 {
6865
6866                     my $token_next = $rLL->[$Knext]->[_TOKEN_];
6867                     my $type_next  = $rLL->[$Knext]->[_TYPE_];
6868
6869                     my $do_not_delete = is_essential_whitespace(
6870                         $last_last_nonblank_code_token,
6871                         $last_last_nonblank_code_type,
6872                         $last_nonblank_code_token,
6873                         $last_nonblank_code_type,
6874                         $token_next,
6875                         $type_next,
6876                     );
6877
6878                     # Note that repeated blanks will get filtered out here
6879                     next unless ($do_not_delete);
6880                 }
6881
6882                 # make it just one character
6883                 $rtoken_vars->[_TOKEN_] = SPACE;
6884                 $store_token->($rtoken_vars);
6885                 next;
6886             }
6887
6888             # Handle a nonblank token...
6889
6890             if ($type_sequence) {
6891
6892                 # Insert a tentative missing semicolon if the next token is
6893                 # a closing block brace
6894                 if (
6895                        $type eq '}'
6896                     && $token eq '}'
6897
6898                     # not preceded by a ';'
6899                     && $last_nonblank_code_type ne ';'
6900
6901                     # and this is not a VERSION stmt (is all one line, we
6902                     # are not inserting semicolons on one-line blocks)
6903                     && $CODE_type ne 'VER'
6904
6905                     # and we are allowed to add semicolons
6906                     && $rOpts->{'add-semicolons'}
6907                   )
6908                 {
6909                     $add_phantom_semicolon->($KK);
6910                 }
6911             }
6912
6913             # Modify certain tokens here for whitespace
6914             # The following is not yet done, but could be:
6915             #   sub (x x x)
6916             #     ( $type =~ /^[wit]$/ )
6917             elsif ( $is_wit{$type} ) {
6918
6919                 # change '$  var'  to '$var' etc
6920                 # change '@    '   to '@'
6921                 # Examples: <<snippets/space1.in>>
6922                 my $ord = ord( substr( $token, 1, 1 ) );
6923                 if (
6924
6925                     # quick test for possible blank at second char
6926                     $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
6927                         || $ord > ORD_PRINTABLE_MAX )
6928                   )
6929                 {
6930                     my ( $sigil, $word ) = split /\s+/, $token, 2;
6931
6932                     # $sigil =~ /^[\$\&\%\*\@]$/ )
6933                     if ( $is_sigil{$sigil} ) {
6934                         $token = $sigil;
6935                         $token .= $word if ( defined($word) );    # fix c104
6936                         $rtoken_vars->[_TOKEN_] = $token;
6937                     }
6938                 }
6939
6940                 # Split identifiers with leading arrows, inserting blanks
6941                 # if necessary.  It is easier and safer here than in the
6942                 # tokenizer.  For example '->new' becomes two tokens, '->'
6943                 # and 'new' with a possible blank between.
6944                 #
6945                 # Note: there is a related patch in sub set_whitespace_flags
6946                 elsif (length($token) > 2
6947                     && substr( $token, 0, 2 ) eq '->'
6948                     && $token =~ /^\-\>(.*)$/
6949                     && $1 )
6950                 {
6951
6952                     my $token_save = $1;
6953                     my $type_save  = $type;
6954
6955                     # Change '-> new'  to '->new'
6956                     $token_save =~ s/^\s+//g;
6957
6958                     # store a blank to left of arrow if necessary
6959                     my $Kprev = $self->K_previous_nonblank($KK);
6960                     if (   defined($Kprev)
6961                         && $rLL->[$Kprev]->[_TYPE_] ne 'b'
6962                         && $rOpts_add_whitespace
6963                         && $want_left_space{'->'} == WS_YES )
6964                     {
6965                         my $rcopy =
6966                           copy_token_as_type( $rtoken_vars, 'b', SPACE );
6967                         $store_token->($rcopy);
6968                     }
6969
6970                     # then store the arrow
6971                     my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
6972                     $store_token->($rcopy);
6973
6974                     # store a blank after the arrow if requested
6975                     # added for issue git #33
6976                     if ( $want_right_space{'->'} == WS_YES ) {
6977                         my $rcopy_b =
6978                           copy_token_as_type( $rtoken_vars, 'b', SPACE );
6979                         $store_token->($rcopy_b);
6980                     }
6981
6982                     # then reset the current token to be the remainder,
6983                     # and reset the whitespace flag according to the arrow
6984                     $token = $rtoken_vars->[_TOKEN_] = $token_save;
6985                     $type  = $rtoken_vars->[_TYPE_]  = $type_save;
6986                     $store_token->($rtoken_vars);
6987                     next;
6988                 }
6989
6990                 # Trim certain spaces in identifiers
6991                 if ( $type eq 'i' ) {
6992
6993                     if (
6994                         (
6995                             substr( $token, 0, 3 ) eq 'sub'
6996                             || $rOpts_sub_alias_list
6997                         )
6998                         && $token =~ /$SUB_PATTERN/
6999                       )
7000                     {
7001
7002                         # -spp = 0 : no space before opening prototype paren
7003                         # -spp = 1 : stable (follow input spacing)
7004                         # -spp = 2 : always space before opening prototype paren
7005                         my $spp = $rOpts->{'space-prototype-paren'};
7006                         if ( defined($spp) ) {
7007                             if    ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
7008                             elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
7009                         }
7010
7011                         # one space max, and no tabs
7012                         $token =~ s/\s+/ /g;
7013                         $rtoken_vars->[_TOKEN_] = $token;
7014                     }
7015
7016                     # clean up spaces in package identifiers, like
7017                     #   "package        Bob::Dog;"
7018                     elsif ( substr( $token, 0, 7 ) eq 'package'
7019                         && $token =~ /^package\s/ )
7020                     {
7021                         $token =~ s/\s+/ /g;
7022                         $rtoken_vars->[_TOKEN_] = $token;
7023                     }
7024
7025                     # trim identifiers of trailing blanks which can occur
7026                     # under some unusual circumstances, such as if the
7027                     # identifier 'witch' has trailing blanks on input here:
7028                     #
7029                     # sub
7030                     # witch
7031                     # ()   # prototype may be on new line ...
7032                     # ...
7033                     my $ord_ch = ord( substr( $token, -1, 1 ) );
7034                     if (
7035
7036                         # quick check for possible ending space
7037                         $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
7038                             || $ord_ch > ORD_PRINTABLE_MAX )
7039                       )
7040                     {
7041                         $token =~ s/\s+$//g;
7042                         $rtoken_vars->[_TOKEN_] = $token;
7043                     }
7044                 }
7045             }
7046
7047             # handle semicolons
7048             elsif ( $type eq ';' ) {
7049
7050                 # Remove unnecessary semicolons, but not after bare
7051                 # blocks, where it could be unsafe if the brace is
7052                 # mis-tokenized.
7053                 if (
7054                     $rOpts->{'delete-semicolons'}
7055                     && (
7056                         (
7057                                $last_nonblank_block_type
7058                             && $last_nonblank_code_type eq '}'
7059                             && (
7060                                 $is_block_without_semicolon{
7061                                     $last_nonblank_block_type}
7062                                 || $last_nonblank_block_type =~ /$SUB_PATTERN/
7063                                 || $last_nonblank_block_type =~ /^\w+:$/
7064                             )
7065                         )
7066                         || $last_nonblank_code_type eq ';'
7067                     )
7068                   )
7069                 {
7070
7071                     # This looks like a deletable semicolon, but even if a
7072                     # semicolon can be deleted it is not necessarily best to do
7073                     # so.  We apply these additional rules for deletion:
7074                     # - Always ok to delete a ';' at the end of a line
7075                     # - Never delete a ';' before a '#' because it would
7076                     #   promote it to a block comment.
7077                     # - If a semicolon is not at the end of line, then only
7078                     #   delete if it is followed by another semicolon or closing
7079                     #   token.  This includes the comment rule.  It may take
7080                     #   two passes to get to a final state, but it is a little
7081                     #   safer.  For example, keep the first semicolon here:
7082                     #      eval { sub bubba { ok(0) }; ok(0) } || ok(1);
7083                     #   It is not required but adds some clarity.
7084                     my $ok_to_delete = 1;
7085                     if ( $KK < $Klast ) {
7086                         my $Kn = $self->K_next_nonblank($KK);
7087                         if ( defined($Kn) && $Kn <= $Klast ) {
7088                             my $next_nonblank_token_type =
7089                               $rLL->[$Kn]->[_TYPE_];
7090                             $ok_to_delete = $next_nonblank_token_type eq ';'
7091                               || $next_nonblank_token_type eq '}';
7092                         }
7093                     }
7094
7095                     # do not delete only nonblank token in a file
7096                     else {
7097                         my $Kp = $self->K_previous_code( undef, $rLL_new );
7098                         my $Kn = $self->K_next_nonblank($KK);
7099                         $ok_to_delete = defined($Kn) || defined($Kp);
7100                     }
7101
7102                     if ($ok_to_delete) {
7103                         $self->note_deleted_semicolon($input_line_number);
7104                         next;
7105                     }
7106                     else {
7107                         write_logfile_entry("Extra ';'\n");
7108                     }
7109                 }
7110             }
7111
7112             # Old patch to add space to something like "x10".
7113             # Note: This is now done in the Tokenizer, but this code remains
7114             # for reference.
7115             elsif ( $type eq 'n' ) {
7116                 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
7117                     $token =~ s/x/x /;
7118                     $rtoken_vars->[_TOKEN_] = $token;
7119                     if (DEVEL_MODE) {
7120                         Fault(<<EOM);
7121 Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
7122 EOM
7123                     }
7124                 }
7125             }
7126
7127             # check for a qw quote
7128             elsif ( $type eq 'q' ) {
7129
7130                 # trim blanks from right of qw quotes
7131                 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
7132                 # this)
7133                 $token =~ s/\s*$//;
7134                 $rtoken_vars->[_TOKEN_] = $token;
7135                 $self->note_embedded_tab($input_line_number)
7136                   if ( $token =~ "\t" );
7137                 $store_token_and_space->(
7138                     $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
7139                 );
7140                 next;
7141             } ## end if ( $type eq 'q' )
7142
7143             # change 'LABEL   :'   to 'LABEL:'
7144             elsif ( $type eq 'J' ) {
7145                 $token =~ s/\s+//g;
7146                 $rtoken_vars->[_TOKEN_] = $token;
7147             }
7148
7149             # check a quote for problems
7150             elsif ( $type eq 'Q' ) {
7151                 $check_Q->( $KK, $Kfirst, $input_line_number );
7152             }
7153
7154             # Store this token with possible previous blank
7155             if ( $rwhitespace_flags->[$KK] == WS_YES ) {
7156                 $store_token_and_space->( $rtoken_vars, 1 );
7157             }
7158             else {
7159                 $store_token->($rtoken_vars);
7160             }
7161
7162         }    # End token loop
7163     }    # End line loop
7164
7165     # Walk backwards through the tokens, making forward links to sequence items.
7166     if ( @{$rLL_new} ) {
7167         my $KNEXT;
7168         foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) {
7169             $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
7170             if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
7171         }
7172         $self->[_K_first_seq_item_] = $KNEXT;
7173     }
7174
7175     # Find and remember lists by sequence number
7176     foreach my $seqno ( keys %{$K_opening_container} ) {
7177         my $K_opening = $K_opening_container->{$seqno};
7178         next unless defined($K_opening);
7179
7180         # code errors may leave undefined closing tokens
7181         my $K_closing = $K_closing_container->{$seqno};
7182         next unless defined($K_closing);
7183
7184         my $lx_open   = $rLL_new->[$K_opening]->[_LINE_INDEX_];
7185         my $lx_close  = $rLL_new->[$K_closing]->[_LINE_INDEX_];
7186         my $line_diff = $lx_close - $lx_open;
7187         $ris_broken_container->{$seqno} = $line_diff;
7188
7189         # See if this is a list
7190         my $is_list;
7191         my $rtype_count = $rtype_count_by_seqno->{$seqno};
7192         if ($rtype_count) {
7193             my $comma_count     = $rtype_count->{','};
7194             my $fat_comma_count = $rtype_count->{'=>'};
7195             my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
7196
7197             # We will define a list to be a container with one or more commas
7198             # and no semicolons. Note that we have included the semicolons
7199             # in a 'for' container in the semicolon count to keep c-style for
7200             # statements from being formatted as lists.
7201             if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
7202                 $is_list = 1;
7203
7204                 # We need to do one more check for a parenthesized list:
7205                 # At an opening paren following certain tokens, such as 'if',
7206                 # we do not want to format the contents as a list.
7207                 if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
7208                     my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
7209                     if ( defined($Kp) ) {
7210                         my $type_p = $rLL_new->[$Kp]->[_TYPE_];
7211                         if ( $type_p eq 'k' ) {
7212                             my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
7213                             $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
7214                         }
7215                         else {
7216                             $is_list = 0 if ( $is_nonlist_type{$type_p} );
7217                         }
7218                     }
7219                 }
7220             }
7221         }
7222
7223         # Look for a block brace marked as uncertain.  If the tokenizer thinks
7224         # its guess is uncertain for the type of a brace following an unknown
7225         # bareword then it adds a trailing space as a signal.  We can fix the
7226         # type here now that we have had a better look at the contents of the
7227         # container. This fixes case b1085. To find the corresponding code in
7228         # Tokenizer.pm search for 'b1085' with an editor.
7229         my $block_type = $rblock_type_of_seqno->{$seqno};
7230         if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
7231
7232             # Always remove the trailing space
7233             $block_type =~ s/\s+$//;
7234
7235             # Try to filter out parenless sub calls
7236             my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
7237             my $Knn2;
7238             if ( defined($Knn1) ) {
7239                 $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
7240             }
7241             my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
7242             my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
7243
7244             #   if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
7245             if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
7246                 $is_list = 0;
7247             }
7248
7249             # Convert to a hash brace if it looks like it holds a list
7250             if ($is_list) {
7251
7252                 $block_type = EMPTY_STRING;
7253
7254                 $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
7255                 $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
7256             }
7257
7258             $rblock_type_of_seqno->{$seqno} = $block_type;
7259         }
7260
7261         # Handle a list container
7262         if ( $is_list && !$block_type ) {
7263             $ris_list_by_seqno->{$seqno} = $seqno;
7264             my $seqno_parent = $rparent_of_seqno->{$seqno};
7265             my $depth        = 0;
7266             while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7267                 $depth++;
7268
7269                 # for $rhas_list we need to save the minimum depth
7270                 if (  !$rhas_list->{$seqno_parent}
7271                     || $rhas_list->{$seqno_parent} > $depth )
7272                 {
7273                     $rhas_list->{$seqno_parent} = $depth;
7274                 }
7275
7276                 if ($line_diff) {
7277                     $rhas_broken_list->{$seqno_parent} = 1;
7278
7279                     # Patch1: We need to mark broken lists with non-terminal
7280                     # line-ending commas for the -bbx=2 parameter. This insures
7281                     # that the list will stay broken.  Otherwise the flag
7282                     # -bbx=2 can be unstable.  This fixes case b789 and b938.
7283
7284                     # Patch2: Updated to also require either one fat comma or
7285                     # one more line-ending comma.  Fixes cases b1069 b1070
7286                     # b1072 b1076.
7287                     if (
7288                         $rlec_count_by_seqno->{$seqno}
7289                         && (   $rlec_count_by_seqno->{$seqno} > 1
7290                             || $rtype_count_by_seqno->{$seqno}->{'=>'} )
7291                       )
7292                     {
7293                         $rhas_broken_list_with_lec->{$seqno_parent} = 1;
7294                     }
7295                 }
7296                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7297             }
7298         }
7299
7300         # Handle code blocks ...
7301         # The -lp option needs to know if a container holds a code block
7302         elsif ( $block_type && $rOpts_line_up_parentheses ) {
7303             my $seqno_parent = $rparent_of_seqno->{$seqno};
7304             while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7305                 $rhas_code_block->{$seqno_parent}        = 1;
7306                 $rhas_broken_code_block->{$seqno_parent} = $line_diff;
7307                 $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7308             }
7309         }
7310     }
7311
7312     # Find containers with ternaries, needed for -lp formatting.
7313     foreach my $seqno ( keys %{$K_opening_ternary} ) {
7314         my $seqno_parent = $rparent_of_seqno->{$seqno};
7315         while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
7316             $rhas_ternary->{$seqno_parent} = 1;
7317             $seqno_parent = $rparent_of_seqno->{$seqno_parent};
7318         }
7319     }
7320
7321     # Turn off -lp for containers with here-docs with text within a container,
7322     # since they have their own fixed indentation.  Fixes case b1081.
7323     if ($rOpts_line_up_parentheses) {
7324         foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
7325             my $Kh      = $K_first_here_doc_by_seqno{$seqno};
7326             my $Kc      = $K_closing_container->{$seqno};
7327             my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
7328             my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
7329             next if ( $line_Kh == $line_Kc );
7330             $ris_excluded_lp_container->{$seqno} = 1;
7331         }
7332     }
7333
7334     # Set a flag to turn off -cab=3 in complex structures.  Otherwise,
7335     # instability can occur.  When it is overridden the behavior of the closest
7336     # match, -cab=2, will be used instead.  This fixes cases b1096 b1113.
7337     if ( $rOpts_comma_arrow_breakpoints == 3 ) {
7338         foreach my $seqno ( keys %{$K_opening_container} ) {
7339
7340             my $rtype_count = $rtype_count_by_seqno->{$seqno};
7341             next unless ( $rtype_count && $rtype_count->{'=>'} );
7342
7343             # override -cab=3 if this contains a sub-list
7344             if ( $rhas_list->{$seqno} ) {
7345                 $roverride_cab3->{$seqno} = 1;
7346             }
7347
7348             # or if this is a sub-list of its parent container
7349             else {
7350                 my $seqno_parent = $rparent_of_seqno->{$seqno};
7351                 if ( defined($seqno_parent)
7352                     && $ris_list_by_seqno->{$seqno_parent} )
7353                 {
7354                     $roverride_cab3->{$seqno} = 1;
7355                 }
7356             }
7357         }
7358     }
7359
7360     # Reset memory to be the new array
7361     $self->[_rLL_] = $rLL_new;
7362     my $Klimit;
7363     if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
7364     $self->[_Klimit_] = $Klimit;
7365
7366     # During development, verify that the new array still looks okay.
7367     DEVEL_MODE && $self->check_token_array();
7368
7369     # reset the token limits of each line
7370     $self->resync_lines_and_tokens();
7371
7372     return;
7373 } ## end sub respace_tokens
7374
7375 sub copy_token_as_type {
7376
7377     # This provides a quick way to create a new token by
7378     # slightly modifying an existing token.
7379     my ( $rold_token, $type, $token ) = @_;
7380     if ( $type eq 'b' ) {
7381         $token = SPACE unless defined($token);
7382     }
7383     elsif ( $type eq 'q' ) {
7384         $token = EMPTY_STRING unless defined($token);
7385     }
7386     elsif ( $type eq '->' ) {
7387         $token = '->' unless defined($token);
7388     }
7389     elsif ( $type eq ';' ) {
7390         $token = ';' unless defined($token);
7391     }
7392     else {
7393
7394         # Unexpected type ... this sub will work as long as both $token and
7395         # $type are defined, but we should catch any unexpected types during
7396         # development.
7397         if (DEVEL_MODE) {
7398             Fault(<<EOM);
7399 sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
7400 EOM
7401         }
7402         else {
7403             # shouldn't happen
7404         }
7405     }
7406
7407     my @rnew_token = @{$rold_token};
7408     $rnew_token[_TYPE_]          = $type;
7409     $rnew_token[_TOKEN_]         = $token;
7410     $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
7411     return \@rnew_token;
7412 } ## end sub copy_token_as_type
7413
7414 sub Debug_dump_tokens {
7415
7416     # a debug routine, not normally used
7417     my ( $self, $msg ) = @_;
7418     my $rLL   = $self->[_rLL_];
7419     my $nvars = @{$rLL};
7420     print STDERR "$msg\n";
7421     print STDERR "ntokens=$nvars\n";
7422     print STDERR "K\t_TOKEN_\t_TYPE_\n";
7423     my $K = 0;
7424
7425     foreach my $item ( @{$rLL} ) {
7426         print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
7427         $K++;
7428     }
7429     return;
7430 } ## end sub Debug_dump_tokens
7431
7432 sub K_next_code {
7433     my ( $self, $KK, $rLL ) = @_;
7434
7435     # return the index K of the next nonblank, non-comment token
7436     return unless ( defined($KK) && $KK >= 0 );
7437
7438     # use the standard array unless given otherwise
7439     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7440     my $Num  = @{$rLL};
7441     my $Knnb = $KK + 1;
7442     while ( $Knnb < $Num ) {
7443         if ( !defined( $rLL->[$Knnb] ) ) {
7444
7445             # We seem to have encountered a gap in our array.
7446             # This shouldn't happen because sub write_line() pushed
7447             # items into the $rLL array.
7448             Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7449             return;
7450         }
7451         if (   $rLL->[$Knnb]->[_TYPE_] ne 'b'
7452             && $rLL->[$Knnb]->[_TYPE_] ne '#' )
7453         {
7454             return $Knnb;
7455         }
7456         $Knnb++;
7457     }
7458     return;
7459 } ## end sub K_next_code
7460
7461 sub K_next_nonblank {
7462     my ( $self, $KK, $rLL ) = @_;
7463
7464     # return the index K of the next nonblank token, or
7465     # return undef if none
7466     return unless ( defined($KK) && $KK >= 0 );
7467
7468     # The third arg allows this routine to be used on any array.  This is
7469     # useful in sub respace_tokens when we are copying tokens from an old $rLL
7470     # to a new $rLL array.  But usually the third arg will not be given and we
7471     # will just use the $rLL array in $self.
7472     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7473     my $Num  = @{$rLL};
7474     my $Knnb = $KK + 1;
7475     return unless ( $Knnb < $Num );
7476     return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7477     return unless ( ++$Knnb < $Num );
7478     return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
7479
7480     # Backup loop. Very unlikely to get here; it means we have neighboring
7481     # blanks in the token stream.
7482     $Knnb++;
7483     while ( $Knnb < $Num ) {
7484
7485         # Safety check, this fault shouldn't happen:  The $rLL array is the
7486         # main array of tokens, so all entries should be used.  It is
7487         # initialized in sub write_line, and then re-initialized by sub
7488         # $store_token() within sub respace_tokens.  Tokens are pushed on
7489         # so there shouldn't be any gaps.
7490         if ( !defined( $rLL->[$Knnb] ) ) {
7491             Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
7492             return;
7493         }
7494         if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
7495         $Knnb++;
7496     }
7497     return;
7498 } ## end sub K_next_nonblank
7499
7500 sub K_previous_code {
7501
7502     # return the index K of the previous nonblank, non-comment token
7503     # Call with $KK=undef to start search at the top of the array
7504     my ( $self, $KK, $rLL ) = @_;
7505
7506     # use the standard array unless given otherwise
7507     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7508     my $Num = @{$rLL};
7509     if    ( !defined($KK) ) { $KK = $Num }
7510     elsif ( $KK > $Num ) {
7511
7512         # This fault can be caused by a programming error in which a bad $KK is
7513         # given.  The caller should make the first call with KK_new=undef to
7514         # avoid this error.
7515         Fault(
7516 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7517         ) if (DEVEL_MODE);
7518         return;
7519     }
7520     my $Kpnb = $KK - 1;
7521     while ( $Kpnb >= 0 ) {
7522         if (   $rLL->[$Kpnb]->[_TYPE_] ne 'b'
7523             && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
7524         {
7525             return $Kpnb;
7526         }
7527         $Kpnb--;
7528     }
7529     return;
7530 } ## end sub K_previous_code
7531
7532 sub K_previous_nonblank {
7533
7534     # return index of previous nonblank token before item K;
7535     # Call with $KK=undef to start search at the top of the array
7536     my ( $self, $KK, $rLL ) = @_;
7537
7538     # use the standard array unless given otherwise
7539     $rLL = $self->[_rLL_] unless ( defined($rLL) );
7540     my $Num = @{$rLL};
7541     if    ( !defined($KK) ) { $KK = $Num }
7542     elsif ( $KK > $Num ) {
7543
7544         # This fault can be caused by a programming error in which a bad $KK is
7545         # given.  The caller should make the first call with KK_new=undef to
7546         # avoid this error.
7547         Fault(
7548 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
7549         ) if (DEVEL_MODE);
7550         return;
7551     }
7552     my $Kpnb = $KK - 1;
7553     return unless ( $Kpnb >= 0 );
7554     return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7555     return unless ( --$Kpnb >= 0 );
7556     return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
7557
7558     # Backup loop. We should not get here unless some routine
7559     # slipped repeated blanks into the token stream.
7560     return unless ( --$Kpnb >= 0 );
7561     while ( $Kpnb >= 0 ) {
7562         if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
7563         $Kpnb--;
7564     }
7565     return;
7566 } ## end sub K_previous_nonblank
7567
7568 sub parent_seqno_by_K {
7569
7570     # Return the sequence number of the parent container of token K, if any.
7571
7572     my ( $self, $KK ) = @_;
7573     my $rLL = $self->[_rLL_];
7574
7575     # The task is to jump forward to the next container token
7576     # and use the sequence number of either it or its parent.
7577
7578     # For example, consider the following with seqno=5 of the '[' and ']'
7579     # being called with index K of the first token of each line:
7580
7581     #                                              # result
7582     #    push @tests,                              # -
7583     #      [                                       # -
7584     #        sub { 99 },   'do {&{%s} for 1,2}',   # 5
7585     #        '(&{})(&{})', undef,                  # 5
7586     #        [ 2, 2, 0 ],  0                       # 5
7587     #      ];                                      # -
7588
7589     # NOTE: The ending parent will be SEQ_ROOT for a balanced file.  For
7590     # unbalanced files, last sequence number will either be undefined or it may
7591     # be at a deeper level.  In either case we will just return SEQ_ROOT to
7592     # have a defined value and allow formatting to proceed.
7593     my $parent_seqno  = SEQ_ROOT;
7594     my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7595     if ($type_sequence) {
7596         $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7597     }
7598     else {
7599         my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
7600         if ( defined($Kt) ) {
7601             $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
7602             my $type = $rLL->[$Kt]->[_TYPE_];
7603
7604             # if next container token is closing, it is the parent seqno
7605             if ( $is_closing_type{$type} ) {
7606                 $parent_seqno = $type_sequence;
7607             }
7608
7609             # otherwise we want its parent container
7610             else {
7611                 $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
7612             }
7613         }
7614     }
7615     $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
7616     return $parent_seqno;
7617 } ## end sub parent_seqno_by_K
7618
7619 sub is_in_block_by_i {
7620     my ( $self, $i ) = @_;
7621
7622     # returns true if
7623     #     token at i is contained in a BLOCK
7624     #     or is at root level
7625     #     or there is some kind of error (i.e. unbalanced file)
7626     # returns false otherwise
7627     return 1 if ( $i < 0 );    # shouldn't happen, bad call
7628     my $seqno = $parent_seqno_to_go[$i];
7629     return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
7630     return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
7631     return;
7632 } ## end sub is_in_block_by_i
7633
7634 sub is_in_list_by_i {
7635     my ( $self, $i ) = @_;
7636
7637     # returns true if token at i is contained in a LIST
7638     # returns false otherwise
7639     my $seqno = $parent_seqno_to_go[$i];
7640     return unless ( $seqno && $seqno ne SEQ_ROOT );
7641     if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
7642         return 1;
7643     }
7644     return;
7645 } ## end sub is_in_list_by_i
7646
7647 sub is_list_by_K {
7648
7649     # Return true if token K is in a list
7650     my ( $self, $KK ) = @_;
7651
7652     my $parent_seqno = $self->parent_seqno_by_K($KK);
7653     return unless defined($parent_seqno);
7654     return $self->[_ris_list_by_seqno_]->{$parent_seqno};
7655 }
7656
7657 sub is_list_by_seqno {
7658
7659     # Return true if the immediate contents of a container appears to be a
7660     # list.
7661     my ( $self, $seqno ) = @_;
7662     return unless defined($seqno);
7663     return $self->[_ris_list_by_seqno_]->{$seqno};
7664 }
7665
7666 sub resync_lines_and_tokens {
7667
7668     my $self   = shift;
7669     my $rLL    = $self->[_rLL_];
7670     my $Klimit = $self->[_Klimit_];
7671     my $rlines = $self->[_rlines_];
7672     my @Krange_code_without_comments;
7673     my @Klast_valign_code;
7674
7675     # Re-construct the arrays of tokens associated with the original input lines
7676     # since they have probably changed due to inserting and deleting blanks
7677     # and a few other tokens.
7678
7679     # This is the next token and its line index:
7680     my $Knext = 0;
7681     my $Kmax  = defined($Klimit) ? $Klimit : -1;
7682
7683     # Verify that old line indexes are in still order.  If this error occurs,
7684     # check locations where sub 'respace_tokens' creates new tokens (like
7685     # blank spaces).  It must have set a bad old line index.
7686     if ( DEVEL_MODE && defined($Klimit) ) {
7687         my $iline = $rLL->[0]->[_LINE_INDEX_];
7688         foreach my $KK ( 1 .. $Klimit ) {
7689             my $iline_last = $iline;
7690             $iline = $rLL->[$KK]->[_LINE_INDEX_];
7691             if ( $iline < $iline_last ) {
7692                 my $KK_m    = $KK - 1;
7693                 my $token_m = $rLL->[$KK_m]->[_TOKEN_];
7694                 my $token   = $rLL->[$KK]->[_TOKEN_];
7695                 my $type_m  = $rLL->[$KK_m]->[_TYPE_];
7696                 my $type    = $rLL->[$KK]->[_TYPE_];
7697                 Fault(<<EOM);
7698 Line indexes out of order at index K=$KK:
7699 at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
7700 at KK   =$KK: old line=$iline, type='$type', token='$token',
7701 EOM
7702             }
7703         }
7704     }
7705
7706     my $iline = -1;
7707     foreach my $line_of_tokens ( @{$rlines} ) {
7708         $iline++;
7709         my $line_type = $line_of_tokens->{_line_type};
7710         if ( $line_type eq 'CODE' ) {
7711
7712             # Get the old number of tokens on this line
7713             my $rK_range_old = $line_of_tokens->{_rK_range};
7714             my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
7715             my $Kdiff_old = 0;
7716             if ( defined($Kfirst_old) ) {
7717                 $Kdiff_old = $Klast_old - $Kfirst_old;
7718             }
7719
7720             # Find the range of NEW K indexes for the line:
7721             # $Kfirst = index of first token on line
7722             # $Klast  = index of last token on line
7723             my ( $Kfirst, $Klast );
7724
7725             my $Knext_beg = $Knext;    # this will be $Kfirst if we find tokens
7726
7727             # Optimization: Although the actual K indexes may be completely
7728             # changed after respacing, the number of tokens on any given line
7729             # will often be nearly unchanged.  So we will see if we can start
7730             # our search by guessing that the new line has the same number
7731             # of tokens as the old line.
7732             my $Knext_guess = $Knext + $Kdiff_old;
7733             if (   $Knext_guess > $Knext
7734                 && $Knext_guess < $Kmax
7735                 && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
7736             {
7737
7738                 # the guess is good, so we can start our search here
7739                 $Knext = $Knext_guess + 1;
7740             }
7741
7742             while ($Knext <= $Kmax
7743                 && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
7744             {
7745                 $Knext++;
7746             }
7747
7748             if ( $Knext > $Knext_beg ) {
7749
7750                 $Klast = $Knext - 1;
7751
7752                 # Delete any terminal blank token
7753                 if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
7754
7755                 if ( $Klast < $Knext_beg ) {
7756                     $Klast = undef;
7757                 }
7758                 else {
7759
7760                     $Kfirst = $Knext_beg;
7761
7762                     # Save ranges of non-comment code. This will be used by
7763                     # sub keep_old_line_breaks.
7764                     if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
7765                         push @Krange_code_without_comments, [ $Kfirst, $Klast ];
7766                     }
7767
7768                     # Only save ending K indexes of code types which are blank
7769                     # or 'VER'.  These will be used for a convergence check.
7770                     # See related code in sub 'convey_batch_to_vertical_aligner'
7771                     my $CODE_type = $line_of_tokens->{_code_type};
7772                     if (  !$CODE_type
7773                         || $CODE_type eq 'VER' )
7774                     {
7775                         push @Klast_valign_code, $Klast;
7776                     }
7777                 }
7778             }
7779
7780             # It is only safe to trim the actual line text if the input
7781             # line had a terminal blank token. Otherwise, we may be
7782             # in a quote.
7783             if ( $line_of_tokens->{_ended_in_blank_token} ) {
7784                 $line_of_tokens->{_line_text} =~ s/\s+$//;
7785             }
7786             $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
7787
7788             # Deleting semicolons can create new empty code lines
7789             # which should be marked as blank
7790             if ( !defined($Kfirst) ) {
7791                 my $CODE_type = $line_of_tokens->{_code_type};
7792                 if ( !$CODE_type ) {
7793                     $line_of_tokens->{_code_type} = 'BL';
7794                 }
7795             }
7796         }
7797     }
7798
7799     # There shouldn't be any nodes beyond the last one.  This routine is
7800     # relinking lines and tokens after the tokens have been respaced.  A fault
7801     # here indicates some kind of bug has been introduced into the above loops.
7802     # There is not good way to keep going; we better stop here.
7803     # FIXME: This will produce zero output. it would be best to find a way to
7804     # dump the input file.
7805     if ( $Knext <= $Kmax ) {
7806
7807         Fault("unexpected tokens at end of file when reconstructing lines");
7808     }
7809     $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
7810
7811     # Setup the convergence test in the FileWriter based on line-ending indexes
7812     my $file_writer_object = $self->[_file_writer_object_];
7813     $file_writer_object->setup_convergence_test( \@Klast_valign_code );
7814
7815     # Mark essential old breakpoints if combination -iob -lp is used.  These
7816     # two options do not work well together, but we can avoid turning -iob off
7817     # by ignoring -iob at certain essential line breaks.
7818     # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
7819     if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
7820         my %is_assignment_or_fat_comma = %is_assignment;
7821         $is_assignment_or_fat_comma{'=>'} = 1;
7822         my $ris_essential_old_breakpoint =
7823           $self->[_ris_essential_old_breakpoint_];
7824         my ( $Kfirst, $Klast );
7825         foreach my $line_of_tokens ( @{$rlines} ) {
7826             my $line_type = $line_of_tokens->{_line_type};
7827             if ( $line_type ne 'CODE' ) {
7828                 ( $Kfirst, $Klast ) = ( undef, undef );
7829                 next;
7830             }
7831             my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
7832             ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
7833
7834             next unless defined($Klast_prev);
7835             next unless defined($Kfirst);
7836             my $type_last  = $rLL->[$Klast_prev]->[_TOKEN_];
7837             my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
7838             next
7839               unless ( $is_assignment_or_fat_comma{$type_last}
7840                 || $is_assignment_or_fat_comma{$type_first} );
7841             $ris_essential_old_breakpoint->{$Klast_prev} = 1;
7842         }
7843     }
7844     return;
7845 } ## end sub resync_lines_and_tokens
7846
7847 sub keep_old_line_breaks {
7848
7849     # Called once per file to find and mark any old line breaks which
7850     # should be kept.  We will be translating the input hashes into
7851     # token indexes.
7852
7853     # A flag is set as follows:
7854     # = 1 make a hard break (flush the current batch)
7855     #     best for something like leading commas (-kbb=',')
7856     # = 2 make a soft break (keep building current batch)
7857     #     best for something like leading ->
7858
7859     my ($self) = @_;
7860
7861     my $rLL = $self->[_rLL_];
7862     my $rKrange_code_without_comments =
7863       $self->[_rKrange_code_without_comments_];
7864     my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
7865     my $rbreak_after_Klast   = $self->[_rbreak_after_Klast_];
7866     my $rwant_container_open = $self->[_rwant_container_open_];
7867     my $K_opening_container  = $self->[_K_opening_container_];
7868     my $ris_broken_container = $self->[_ris_broken_container_];
7869     my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
7870
7871     # This code moved here from sub break_lists to fix b1120
7872     if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
7873         foreach my $item ( @{$rKrange_code_without_comments} ) {
7874             my ( $Kfirst, $Klast ) = @{$item};
7875             my $type  = $rLL->[$Kfirst]->[_TYPE_];
7876             my $token = $rLL->[$Kfirst]->[_TOKEN_];
7877
7878             # leading '->' use a value of 2 which causes a soft
7879             # break rather than a hard break
7880             if ( $type eq '->' ) {
7881                 $rbreak_before_Kfirst->{$Kfirst} = 2;
7882             }
7883
7884             # leading ')->' use a special flag to insure that both
7885             # opening and closing parens get opened
7886             # Fix for b1120: only for parens, not braces
7887             elsif ( $token eq ')' ) {
7888                 my $Kn = $self->K_next_nonblank($Kfirst);
7889                 next
7890                   unless ( defined($Kn)
7891                     && $Kn <= $Klast
7892                     && $rLL->[$Kn]->[_TYPE_] eq '->' );
7893                 my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
7894                 next unless ($seqno);
7895
7896                 # Note: in previous versions there was a fix here to avoid
7897                 # instability between conflicting -bom and -pvt or -pvtc flags.
7898                 # The fix skipped -bom for a small line difference.  But this
7899                 # was troublesome, and instead the fix has been moved to
7900                 # sub set_vertical_tightness_flags where priority is given to
7901                 # the -bom flag over -pvt and -pvtc flags.  Both opening and
7902                 # closing paren flags are involved because even though -bom only
7903                 # requests breaking before the closing paren, automated logic
7904                 # opens the opening paren when the closing paren opens.
7905                 # Relevant cases are b977, b1215, b1270, b1303
7906
7907                 $rwant_container_open->{$seqno} = 1;
7908             }
7909         }
7910     }
7911
7912     return unless ( %keep_break_before_type || %keep_break_after_type );
7913
7914     my $check_for_break = sub {
7915         my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
7916         my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7917
7918         # non-container tokens use the type as the key
7919         if ( !$seqno ) {
7920             my $type = $rLL->[$KK]->[_TYPE_];
7921             if ( $rkeep_break_hash->{$type} ) {
7922                 $rbreak_hash->{$KK} = 1;
7923             }
7924         }
7925
7926         # container tokens use the token as the key
7927         else {
7928             my $token = $rLL->[$KK]->[_TOKEN_];
7929             my $flag  = $rkeep_break_hash->{$token};
7930             if ($flag) {
7931
7932                 my $match = $flag eq '1' || $flag eq '*';
7933
7934                 # check for special matching codes
7935                 if ( !$match ) {
7936                     if ( $token eq '(' || $token eq ')' ) {
7937                         $match = $self->match_paren_flag( $KK, $flag );
7938                     }
7939                     elsif ( $token eq '{' || $token eq '}' ) {
7940
7941                         # These tentative codes 'b' and 'B' for brace types are
7942                         # placeholders for possible future brace types. They
7943                         # are not documented and may be changed.
7944                         my $block_type =
7945                           $self->[_rblock_type_of_seqno_]->{$seqno};
7946                         if    ( $flag eq 'b' ) { $match = $block_type }
7947                         elsif ( $flag eq 'B' ) { $match = !$block_type }
7948                         else {
7949                             # unknown code - no match
7950                         }
7951                     }
7952                 }
7953                 $rbreak_hash->{$KK} = 1 if ($match);
7954             }
7955         }
7956     };
7957
7958     foreach my $item ( @{$rKrange_code_without_comments} ) {
7959         my ( $Kfirst, $Klast ) = @{$item};
7960         $check_for_break->(
7961             $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
7962         );
7963         $check_for_break->(
7964             $Klast, \%keep_break_after_type, $rbreak_after_Klast
7965         );
7966     }
7967     return;
7968 } ## end sub keep_old_line_breaks
7969
7970 sub weld_containers {
7971
7972     # Called once per file to do any welding operations requested by --weld*
7973     # flags.
7974     my ($self) = @_;
7975
7976     # This count is used to eliminate needless calls for weld checks elsewhere
7977     $total_weld_count = 0;
7978
7979     return if ( $rOpts->{'indent-only'} );
7980     return unless ($rOpts_add_newlines);
7981
7982     # Important: sub 'weld_cuddled_blocks' must be called before
7983     # sub 'weld_nested_containers'. This is because the cuddled option needs to
7984     # use the original _LEVEL_ values of containers, but the weld nested
7985     # containers changes _LEVEL_ of welded containers.
7986
7987     # Here is a good test case to be sure that both cuddling and welding
7988     # are working and not interfering with each other: <<snippets/ce_wn1.in>>
7989
7990     #   perltidy -wn -ce
7991
7992    # if ($BOLD_MATH) { (
7993    #     $labels, $comment,
7994    #     join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
7995    # ) } else { (
7996    #     &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
7997    #     $after
7998    # ) }
7999
8000     $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
8001
8002     if ( $rOpts->{'weld-nested-containers'} ) {
8003
8004         $self->weld_nested_containers();
8005
8006         $self->weld_nested_quotes();
8007     }
8008
8009     #-------------------------------------------------------------
8010     # All welding is done. Finish setting up weld data structures.
8011     #-------------------------------------------------------------
8012
8013     my $rLL                  = $self->[_rLL_];
8014     my $rK_weld_left         = $self->[_rK_weld_left_];
8015     my $rK_weld_right        = $self->[_rK_weld_right_];
8016     my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
8017
8018     my @K_multi_weld;
8019     my @keys = keys %{$rK_weld_right};
8020     $total_weld_count = @keys;
8021
8022     # First pass to process binary welds.
8023     # This loop is processed in unsorted order for efficiency.
8024     foreach my $Kstart (@keys) {
8025         my $Kend = $rK_weld_right->{$Kstart};
8026
8027         # An error here would be due to an incorrect initialization introduced
8028         # in one of the above weld routines, like sub weld_nested.
8029         if ( $Kend <= $Kstart ) {
8030             Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
8031               if (DEVEL_MODE);
8032             next;
8033         }
8034
8035         # Set weld values for all tokens this welded pair
8036         foreach ( $Kstart + 1 .. $Kend ) {
8037             $rK_weld_left->{$_} = $Kstart;
8038         }
8039         foreach my $Kx ( $Kstart .. $Kend - 1 ) {
8040             $rK_weld_right->{$Kx} = $Kend;
8041             $rweld_len_right_at_K->{$Kx} =
8042               $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
8043               $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
8044         }
8045
8046         # Remember the leftmost index of welds which continue to the right
8047         if ( defined( $rK_weld_right->{$Kend} )
8048             && !defined( $rK_weld_left->{$Kstart} ) )
8049         {
8050             push @K_multi_weld, $Kstart;
8051         }
8052     }
8053
8054     # Second pass to process chains of welds (these are rare).
8055     # This has to be processed in sorted order.
8056     if (@K_multi_weld) {
8057         my $Kend = -1;
8058         foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
8059
8060             # Skip any interior K which was originally missing a left link
8061             next if ( $Kstart <= $Kend );
8062
8063             # Find the end of this chain
8064             $Kend = $rK_weld_right->{$Kstart};
8065             my $Knext = $rK_weld_right->{$Kend};
8066             while ( defined($Knext) ) {
8067                 $Kend  = $Knext;
8068                 $Knext = $rK_weld_right->{$Kend};
8069             }
8070
8071             # Set weld values this chain
8072             foreach ( $Kstart + 1 .. $Kend ) {
8073                 $rK_weld_left->{$_} = $Kstart;
8074             }
8075             foreach my $Kx ( $Kstart .. $Kend - 1 ) {
8076                 $rK_weld_right->{$Kx} = $Kend;
8077                 $rweld_len_right_at_K->{$Kx} =
8078                   $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
8079                   $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
8080             }
8081         }
8082     }
8083
8084     return;
8085 } ## end sub weld_containers
8086
8087 sub cumulative_length_before_K {
8088     my ( $self, $KK ) = @_;
8089     my $rLL = $self->[_rLL_];
8090     return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8091 }
8092
8093 sub weld_cuddled_blocks {
8094     my ($self) = @_;
8095
8096     # Called once per file to handle cuddled formatting
8097
8098     my $rK_weld_left         = $self->[_rK_weld_left_];
8099     my $rK_weld_right        = $self->[_rK_weld_right_];
8100     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8101
8102     # This routine implements the -cb flag by finding the appropriate
8103     # closing and opening block braces and welding them together.
8104     return unless ( %{$rcuddled_block_types} );
8105
8106     my $rLL = $self->[_rLL_];
8107     return unless ( defined($rLL) && @{$rLL} );
8108     my $rbreak_container = $self->[_rbreak_container_];
8109
8110     my $K_opening_container = $self->[_K_opening_container_];
8111     my $K_closing_container = $self->[_K_closing_container_];
8112
8113     my $length_to_opening_seqno = sub {
8114         my ($seqno) = @_;
8115         my $KK      = $K_opening_container->{$seqno};
8116         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8117         return $lentot;
8118     };
8119     my $length_to_closing_seqno = sub {
8120         my ($seqno) = @_;
8121         my $KK      = $K_closing_container->{$seqno};
8122         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
8123         return $lentot;
8124     };
8125
8126     my $is_broken_block = sub {
8127
8128         # a block is broken if the input line numbers of the braces differ
8129         # we can only cuddle between broken blocks
8130         my ($seqno) = @_;
8131         my $K_opening = $K_opening_container->{$seqno};
8132         return unless ( defined($K_opening) );
8133         my $K_closing = $K_closing_container->{$seqno};
8134         return unless ( defined($K_closing) );
8135         return $rbreak_container->{$seqno}
8136           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8137           $rLL->[$K_opening]->[_LINE_INDEX_];
8138     };
8139
8140     # A stack to remember open chains at all levels: This is a hash rather than
8141     # an array for safety because negative levels can occur in files with
8142     # errors.  This allows us to keep processing with negative levels.
8143     # $in_chain{$level} = [$chain_type, $type_sequence];
8144     my %in_chain;
8145     my $CBO = $rOpts->{'cuddled-break-option'};
8146
8147     # loop over structure items to find cuddled pairs
8148     my $level = 0;
8149     my $KNEXT = $self->[_K_first_seq_item_];
8150     while ( defined($KNEXT) ) {
8151         my $KK = $KNEXT;
8152         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
8153         my $rtoken_vars   = $rLL->[$KK];
8154         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8155         if ( !$type_sequence ) {
8156             next if ( $KK == 0 );    # first token in file may not be container
8157
8158             # A fault here implies that an error was made in the little loop at
8159             # the bottom of sub 'respace_tokens' which set the values of
8160             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
8161             # loop control lines above.
8162             Fault("sequence = $type_sequence not defined at K=$KK")
8163               if (DEVEL_MODE);
8164             next;
8165         }
8166
8167         # NOTE: we must use the original levels here. They can get changed
8168         # by sub 'weld_nested_containers', so this routine must be called
8169         # before sub 'weld_nested_containers'.
8170         my $last_level = $level;
8171         $level = $rtoken_vars->[_LEVEL_];
8172
8173         if    ( $level < $last_level ) { $in_chain{$last_level} = undef }
8174         elsif ( $level > $last_level ) { $in_chain{$level}      = undef }
8175
8176         # We are only looking at code blocks
8177         my $token = $rtoken_vars->[_TOKEN_];
8178         my $type  = $rtoken_vars->[_TYPE_];
8179         next unless ( $type eq $token );
8180
8181         if ( $token eq '{' ) {
8182
8183             my $block_type = $rblock_type_of_seqno->{$type_sequence};
8184             if ( !$block_type ) {
8185
8186                 # patch for unrecognized block types which may not be labeled
8187                 my $Kp = $self->K_previous_nonblank($KK);
8188                 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
8189                     $Kp = $self->K_previous_nonblank($Kp);
8190                 }
8191                 next unless $Kp;
8192                 $block_type = $rLL->[$Kp]->[_TOKEN_];
8193             }
8194             if ( $in_chain{$level} ) {
8195
8196                 # we are in a chain and are at an opening block brace.
8197                 # See if we are welding this opening brace with the previous
8198                 # block brace.  Get their identification numbers:
8199                 my $closing_seqno = $in_chain{$level}->[1];
8200                 my $opening_seqno = $type_sequence;
8201
8202                 # The preceding block must be on multiple lines so that its
8203                 # closing brace will start a new line.
8204                 if ( !$is_broken_block->($closing_seqno) ) {
8205                     next unless ( $CBO == 2 );
8206                     $rbreak_container->{$closing_seqno} = 1;
8207                 }
8208
8209                 # we will let the trailing block be either broken or intact
8210                 ## && $is_broken_block->($opening_seqno);
8211
8212                 # We can weld the closing brace to its following word ..
8213                 my $Ko = $K_closing_container->{$closing_seqno};
8214                 my $Kon;
8215                 if ( defined($Ko) ) {
8216                     $Kon = $self->K_next_nonblank($Ko);
8217                 }
8218
8219                 # ..unless it is a comment
8220                 if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
8221
8222                     # OK to weld these two tokens...
8223                     $rK_weld_right->{$Ko} = $Kon;
8224                     $rK_weld_left->{$Kon} = $Ko;
8225
8226                     # Set flag that we want to break the next container
8227                     # so that the cuddled line is balanced.
8228                     $rbreak_container->{$opening_seqno} = 1
8229                       if ($CBO);
8230                 }
8231
8232             }
8233             else {
8234
8235                 # We are not in a chain. Start a new chain if we see the
8236                 # starting block type.
8237                 if ( $rcuddled_block_types->{$block_type} ) {
8238                     $in_chain{$level} = [ $block_type, $type_sequence ];
8239                 }
8240                 else {
8241                     $block_type = '*';
8242                     $in_chain{$level} = [ $block_type, $type_sequence ];
8243                 }
8244             }
8245         }
8246         elsif ( $token eq '}' ) {
8247             if ( $in_chain{$level} ) {
8248
8249                 # We are in a chain at a closing brace.  See if this chain
8250                 # continues..
8251                 my $Knn = $self->K_next_code($KK);
8252                 next unless $Knn;
8253
8254                 my $chain_type          = $in_chain{$level}->[0];
8255                 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
8256                 if (
8257                     $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
8258                   )
8259                 {
8260
8261                     # Note that we do not weld yet because we must wait until
8262                     # we we are sure that an opening brace for this follows.
8263                     $in_chain{$level}->[1] = $type_sequence;
8264                 }
8265                 else { $in_chain{$level} = undef }
8266             }
8267         }
8268     }
8269     return;
8270 } ## end sub weld_cuddled_blocks
8271
8272 sub find_nested_pairs {
8273     my $self = shift;
8274
8275     # This routine is called once per file to do preliminary work needed for
8276     # the --weld-nested option.  This information is also needed for adding
8277     # semicolons.
8278
8279     my $rLL = $self->[_rLL_];
8280     return unless ( defined($rLL) && @{$rLL} );
8281     my $Num = @{$rLL};
8282
8283     my $K_opening_container  = $self->[_K_opening_container_];
8284     my $K_closing_container  = $self->[_K_closing_container_];
8285     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
8286
8287     # We define an array of pairs of nested containers
8288     my @nested_pairs;
8289
8290     # Names of calling routines can either be marked as 'i' or 'w',
8291     # and they may invoke a sub call with an '->'. We will consider
8292     # any consecutive string of such types as a single unit when making
8293     # weld decisions.  We also allow a leading !
8294     my $is_name_type = {
8295         'i'  => 1,
8296         'w'  => 1,
8297         'U'  => 1,
8298         '->' => 1,
8299         '!'  => 1,
8300     };
8301
8302     # Loop over all closing container tokens
8303     foreach my $inner_seqno ( keys %{$K_closing_container} ) {
8304         my $K_inner_closing = $K_closing_container->{$inner_seqno};
8305
8306         # See if it is immediately followed by another, outer closing token
8307         my $K_outer_closing = $K_inner_closing + 1;
8308         $K_outer_closing += 1
8309           if ( $K_outer_closing < $Num
8310             && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
8311
8312         next unless ( $K_outer_closing < $Num );
8313         my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
8314         next unless ($outer_seqno);
8315         my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
8316         next unless ( $is_closing_token{$token_outer_closing} );
8317
8318         # Now we have to check the opening tokens.
8319         my $K_outer_opening = $K_opening_container->{$outer_seqno};
8320         my $K_inner_opening = $K_opening_container->{$inner_seqno};
8321         next unless defined($K_outer_opening) && defined($K_inner_opening);
8322
8323         my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
8324         my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
8325
8326         # Verify that the inner opening token is the next container after the
8327         # outer opening token.
8328         my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
8329         next unless defined($K_io_check);
8330         if ( $K_io_check != $K_inner_opening ) {
8331
8332             # The inner opening container does not immediately follow the outer
8333             # opening container, but we may still allow a weld if they are
8334             # separated by a sub signature.  For example, we may have something
8335             # like this, where $K_io_check may be at the first 'x' instead of
8336             # 'io'.  So we need to hop over the signature and see if we arrive
8337             # at 'io'.
8338
8339             #            oo               io
8340             #             |     x       x |
8341             #   $obj->then( sub ( $code ) {
8342             #       ...
8343             #       return $c->render(text => '', status => $code);
8344             #   } );
8345             #   | |
8346             #  ic oc
8347
8348             next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
8349             next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
8350             my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
8351             next unless defined($seqno_signature);
8352             my $K_signature_closing = $K_closing_container->{$seqno_signature};
8353             next unless defined($K_signature_closing);
8354             my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
8355             next
8356               unless ( defined($K_test) && $K_test == $K_inner_opening );
8357
8358             # OK, we have arrived at 'io' in the above diagram.  We should put
8359             # a limit on the length or complexity of the signature here.  There
8360             # is no perfect way to do this, one way is to put a limit on token
8361             # count.  For consistency with older versions, we should allow a
8362             # signature with a single variable to weld, but not with
8363             # multiple variables.  A single variable as in 'sub ($code) {' can
8364             # have a $Kdiff of 2 to 4, depending on spacing.
8365
8366             # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
8367             # 7, depending on spacing. So to keep formatting consistent with
8368             # previous versions, we will also avoid welding if there is a comma
8369             # in the signature.
8370
8371             my $Kdiff = $K_signature_closing - $K_io_check;
8372             next if ( $Kdiff > 4 );
8373
8374             my $saw_comma;
8375             foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
8376                 if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
8377             }
8378             next if ($saw_comma);
8379         }
8380
8381         # Yes .. this is a possible nesting pair.
8382         # They can be separated by a small amount.
8383         my $K_diff = $K_inner_opening - $K_outer_opening;
8384
8385         # Count nonblank characters separating them.
8386         if ( $K_diff < 0 ) { next }    # Shouldn't happen
8387         my $nonblank_count = 0;
8388         my $type;
8389         my $is_name;
8390
8391         # Here is an example of a long identifier chain which counts as a
8392         # single nonblank here (this spans about 10 K indexes):
8393         #     if ( !Boucherot::SetOfConnections->new->handler->execute(
8394         #        ^--K_o_o                                             ^--K_i_o
8395         #       @array) )
8396         my $Kn_first = $K_outer_opening;
8397         my $Kn_last_nonblank;
8398         my $saw_comment;
8399         foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
8400             next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
8401             if ( !$nonblank_count )        { $Kn_first = $Kn }
8402             if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
8403             $Kn_last_nonblank = $Kn;
8404
8405             # skip chain of identifier tokens
8406             my $last_type    = $type;
8407             my $last_is_name = $is_name;
8408             $type = $rLL->[$Kn]->[_TYPE_];
8409             if ( $type eq '#' ) { $saw_comment = 1; last }
8410             $is_name = $is_name_type->{$type};
8411             next if ( $is_name && $last_is_name );
8412
8413             $nonblank_count++;
8414             last if ( $nonblank_count > 2 );
8415         }
8416
8417         # Do not weld across a comment .. fix for c058.
8418         next if ($saw_comment);
8419
8420         # Patch for b1104: do not weld to a paren preceded by sort/map/grep
8421         # because the special line break rules may cause a blinking state
8422         if (   defined($Kn_last_nonblank)
8423             && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
8424             && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
8425         {
8426             my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
8427
8428             # Turn off welding at sort/map/grep (
8429             if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
8430         }
8431
8432         if (
8433
8434             # adjacent opening containers, like: do {{
8435             $nonblank_count == 1
8436
8437             # short item following opening paren, like:  fun( yyy (
8438             || (   $nonblank_count == 2
8439                 && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
8440
8441             # anonymous sub + prototype or sig:  )->then( sub ($code) {
8442             # ... but it seems best not to stack two structural blocks, like
8443             # this
8444             #    sub make_anon_with_my_sub { sub {
8445             # because it probably hides the structure a little too much.
8446             || (   $inner_blocktype
8447                 && $inner_blocktype eq 'sub'
8448                 && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
8449                 && !$outer_blocktype )
8450           )
8451         {
8452             push @nested_pairs,
8453               [ $inner_seqno, $outer_seqno, $K_inner_closing ];
8454         }
8455         next;
8456     }
8457
8458     # The weld routine expects the pairs in order in the form
8459     #   [$seqno_inner, $seqno_outer]
8460     # And they must be in the same order as the inner closing tokens
8461     # (otherwise, welds of three or more adjacent tokens will not work).  The K
8462     # value of this inner closing token has temporarily been stored for
8463     # sorting.
8464     @nested_pairs =
8465
8466       # Drop the K index after sorting (it would cause trouble downstream)
8467       map { [ $_->[0], $_->[1] ] }
8468
8469       # Sort on the K values
8470       sort { $a->[2] <=> $b->[2] } @nested_pairs;
8471
8472     return \@nested_pairs;
8473 } ## end sub find_nested_pairs
8474
8475 sub match_paren_flag {
8476
8477     # Decide if this paren is excluded by user request:
8478     #   undef matches no parens
8479     #   '*' matches all parens
8480     #   'k' matches only if the previous nonblank token is a perl builtin
8481     #       keyword (such as 'if', 'while'),
8482     #   'K' matches if 'k' does not, meaning if the previous token is not a
8483     #       keyword.
8484     #   'f' matches if the previous token is a function other than a keyword.
8485     #   'F' matches if 'f' does not.
8486     #   'w' matches if either 'k' or 'f' match.
8487     #   'W' matches if 'w' does not.
8488     my ( $self, $KK, $flag ) = @_;
8489
8490     return 0 unless ( defined($flag) );
8491     return 0 if $flag eq '0';
8492     return 1 if $flag eq '1';
8493     return 1 if $flag eq '*';
8494     return 0 unless ( defined($KK) );
8495
8496     my $rLL         = $self->[_rLL_];
8497     my $rtoken_vars = $rLL->[$KK];
8498     my $seqno       = $rtoken_vars->[_TYPE_SEQUENCE_];
8499     return 0 unless ($seqno);
8500     my $token     = $rtoken_vars->[_TOKEN_];
8501     my $K_opening = $KK;
8502     if ( !$is_opening_token{$token} ) {
8503         $K_opening = $self->[_K_opening_container_]->{$seqno};
8504     }
8505     return unless ( defined($K_opening) );
8506
8507     my ( $is_f, $is_k, $is_w );
8508     my $Kp = $self->K_previous_nonblank($K_opening);
8509     if ( defined($Kp) ) {
8510         my $type_p = $rLL->[$Kp]->[_TYPE_];
8511
8512         # keyword?
8513         $is_k = $type_p eq 'k';
8514
8515         # function call?
8516         $is_f = $self->[_ris_function_call_paren_]->{$seqno};
8517
8518         # either keyword or function call?
8519         $is_w = $is_k || $is_f;
8520     }
8521     my $match;
8522     if    ( $flag eq 'k' ) { $match = $is_k }
8523     elsif ( $flag eq 'K' ) { $match = !$is_k }
8524     elsif ( $flag eq 'f' ) { $match = $is_f }
8525     elsif ( $flag eq 'F' ) { $match = !$is_f }
8526     elsif ( $flag eq 'w' ) { $match = $is_w }
8527     elsif ( $flag eq 'W' ) { $match = !$is_w }
8528     return $match;
8529 } ## end sub match_paren_flag
8530
8531 sub is_excluded_weld {
8532
8533     # decide if this weld is excluded by user request
8534     my ( $self, $KK, $is_leading ) = @_;
8535     my $rLL         = $self->[_rLL_];
8536     my $rtoken_vars = $rLL->[$KK];
8537     my $token       = $rtoken_vars->[_TOKEN_];
8538     my $rflags      = $weld_nested_exclusion_rules{$token};
8539     return 0 unless ( defined($rflags) );
8540     my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
8541     return 0 unless ( defined($flag) );
8542     return 1 if $flag eq '*';
8543     return $self->match_paren_flag( $KK, $flag );
8544 } ## end sub is_excluded_weld
8545
8546 # hashes to simplify welding logic
8547 my %type_ok_after_bareword;
8548 my %has_tight_paren;
8549
8550 BEGIN {
8551
8552     # types needed for welding RULE 6
8553     my @q = qw# => -> { ( [ #;
8554     @type_ok_after_bareword{@q} = (1) x scalar(@q);
8555
8556     # these types do not 'like' to be separated from a following paren
8557     @q = qw(w i q Q G C Z U);
8558     @{has_tight_paren}{@q} = (1) x scalar(@q);
8559 }
8560
8561 use constant DEBUG_WELD => 0;
8562
8563 sub setup_new_weld_measurements {
8564
8565     # Define quantities to check for excess line lengths when welded.
8566     # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
8567
8568     my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
8569
8570     # Given indexes of outer and inner opening containers to be welded:
8571     #   $Kouter_opening, $Kinner_opening
8572
8573     # Returns these variables:
8574     #   $new_weld_ok = true (new weld ok) or false (do not start new weld)
8575     #   $starting_indent = starting indentation
8576     #   $starting_lentot = starting cumulative length
8577     #   $msg = diagnostic message for debugging
8578
8579     my $rLL    = $self->[_rLL_];
8580     my $rlines = $self->[_rlines_];
8581
8582     my $starting_level;
8583     my $starting_ci;
8584     my $starting_lentot;
8585     my $maximum_text_length;
8586     my $msg = EMPTY_STRING;
8587
8588     my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
8589     my $rK_range = $rlines->[$iline_oo]->{_rK_range};
8590     my ( $Kfirst, $Klast ) = @{$rK_range};
8591
8592     #-------------------------------------------------------------------------
8593     # We now define a reference index, '$Kref', from which to start measuring
8594     # This choice turns out to be critical for keeping welds stable during
8595     # iterations, so we go through a number of STEPS...
8596     #-------------------------------------------------------------------------
8597
8598     # STEP 1: Our starting guess is to use measure from the first token of the
8599     # current line.  This is usually a good guess.
8600     my $Kref = $Kfirst;
8601
8602     # STEP 2: See if we should go back a little farther
8603     my $Kprev = $self->K_previous_nonblank($Kfirst);
8604     if ( defined($Kprev) ) {
8605
8606         # Avoid measuring from between an opening paren and a previous token
8607         # which should stay close to it ... fixes b1185
8608         my $token_oo  = $rLL->[$Kouter_opening]->[_TOKEN_];
8609         my $type_prev = $rLL->[$Kprev]->[_TYPE_];
8610         if (   $Kouter_opening == $Kfirst
8611             && $token_oo eq '('
8612             && $has_tight_paren{$type_prev} )
8613         {
8614             $Kref = $Kprev;
8615         }
8616
8617         # Back up and count length from a token like '=' or '=>' if -lp
8618         # is used (this fixes b520)
8619         # ...or if a break is wanted before there
8620         elsif ($rOpts_line_up_parentheses
8621             || $want_break_before{$type_prev} )
8622         {
8623
8624             # If there are other sequence items between the start of this line
8625             # and the opening token in question, then do not include tokens on
8626             # the previous line in length calculations.  This check added to
8627             # fix case b1174 which had a '?' on the line
8628             my $no_previous_seq_item = $Kref == $Kouter_opening
8629               || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
8630
8631             if ( $no_previous_seq_item
8632                 && substr( $type_prev, 0, 1 ) eq '=' )
8633             {
8634                 $Kref = $Kprev;
8635
8636                 # Fix for b1144 and b1112: backup to the first nonblank
8637                 # character before the =>, or to the start of its line.
8638                 if ( $type_prev eq '=>' ) {
8639                     my $iline_prev    = $rLL->[$Kprev]->[_LINE_INDEX_];
8640                     my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
8641                     my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
8642                     foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
8643                         next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
8644                         $Kref = $KK;
8645                         last;
8646                     }
8647                 }
8648             }
8649         }
8650     }
8651
8652     # STEP 3: Now look ahead for a ternary and, if found, use it.
8653     # This fixes case b1182.
8654     # Also look for a ')' at the same level and, if found, use it.
8655     # This fixes case b1224.
8656     if ( $Kref < $Kouter_opening ) {
8657         my $Knext    = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
8658         my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8659         while ( $Knext < $Kouter_opening ) {
8660             if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
8661                 if (   $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
8662                     || $rLL->[$Knext]->[_TOKEN_] eq ')' )
8663                 {
8664                     $Kref = $Knext;
8665                     last;
8666                 }
8667             }
8668             $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
8669         }
8670     }
8671
8672     # Define the starting measurements we will need
8673     $starting_lentot =
8674       $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
8675     $starting_level = $rLL->[$Kref]->[_LEVEL_];
8676     $starting_ci    = $rLL->[$Kref]->[_CI_LEVEL_];
8677
8678     $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
8679       $starting_ci * $rOpts_continuation_indentation;
8680
8681     # STEP 4: Switch to using the outer opening token as the reference
8682     # point if a line break before it would make a longer line.
8683     # Fixes case b1055 and is also an alternate fix for b1065.
8684     my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
8685     if ( $Kref < $Kouter_opening ) {
8686         my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
8687         my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
8688         my $maximum_text_length_oo =
8689           $maximum_text_length_at_level[$starting_level_oo] -
8690           $starting_ci_oo * $rOpts_continuation_indentation;
8691
8692         # The excess length to any cumulative length K = lenK is either
8693         #     $excess = $lenk - ($lentot    + $maximum_text_length),     or
8694         #     $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
8695         # so the worst case (maximum excess) corresponds to the configuration
8696         # with minimum value of the sum: $lentot + $maximum_text_length
8697         if ( $lentot_oo + $maximum_text_length_oo <
8698             $starting_lentot + $maximum_text_length )
8699         {
8700             $Kref                = $Kouter_opening;
8701             $starting_level      = $starting_level_oo;
8702             $starting_ci         = $starting_ci_oo;
8703             $starting_lentot     = $lentot_oo;
8704             $maximum_text_length = $maximum_text_length_oo;
8705         }
8706     }
8707
8708     my $new_weld_ok = 1;
8709
8710     # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination.  The
8711     # combination -wn -lp -dws -naws does not work well and can cause blinkers.
8712     # It will probably only occur in stress testing.  For this situation we
8713     # will only start a new weld if we start at a 'good' location.
8714     # - Added 'if' to fix case b1032.
8715     # - Require blank before certain previous characters to fix b1111.
8716     # - Add ';' to fix case b1139
8717     # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
8718     # - relaxed constraints for b1227
8719     if (   $starting_ci
8720         && $rOpts_line_up_parentheses
8721         && $rOpts_delete_old_whitespace
8722         && !$rOpts_add_whitespace
8723         && defined($Kprev) )
8724     {
8725         my $type_first  = $rLL->[$Kfirst]->[_TYPE_];
8726         my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
8727         my $type_prev   = $rLL->[$Kprev]->[_TYPE_];
8728         my $type_pp     = 'b';
8729         if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
8730         unless (
8731                $type_prev =~ /^[\,\.\;]/
8732             || $type_prev =~ /^[=\{\[\(\L]/
8733             && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
8734             || $type_first =~ /^[=\,\.\;\{\[\(\L]/
8735             || $type_first eq '||'
8736             || (
8737                 $type_first eq 'k'
8738                 && (   $token_first eq 'if'
8739                     || $token_first eq 'or' )
8740             )
8741           )
8742         {
8743             $msg =
8744 "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
8745             $new_weld_ok = 0;
8746         }
8747     }
8748     return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
8749 } ## end sub setup_new_weld_measurements
8750
8751 sub excess_line_length_for_Krange {
8752     my ( $self, $Kfirst, $Klast ) = @_;
8753
8754     # returns $excess_length =
8755     #   by how many characters a line composed of tokens $Kfirst .. $Klast will
8756     #   exceed the allowed line length
8757
8758     my $rLL = $self->[_rLL_];
8759     my $length_before_Kfirst =
8760       $Kfirst <= 0
8761       ? 0
8762       : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
8763
8764     # backup before a side comment if necessary
8765     my $Kend = $Klast;
8766     if (   $rOpts_ignore_side_comment_lengths
8767         && $rLL->[$Klast]->[_TYPE_] eq '#' )
8768     {
8769         my $Kprev = $self->K_previous_nonblank($Klast);
8770         if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
8771     }
8772
8773     # get the length of the text
8774     my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
8775
8776     # get the size of the text window
8777     my $level           = $rLL->[$Kfirst]->[_LEVEL_];
8778     my $ci_level        = $rLL->[$Kfirst]->[_CI_LEVEL_];
8779     my $max_text_length = $maximum_text_length_at_level[$level] -
8780       $ci_level * $rOpts_continuation_indentation;
8781
8782     my $excess_length = $length - $max_text_length;
8783
8784     DEBUG_WELD
8785       && print
8786 "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
8787     return ($excess_length);
8788 } ## end sub excess_line_length_for_Krange
8789
8790 sub weld_nested_containers {
8791     my ($self) = @_;
8792
8793     # Called once per file for option '--weld-nested-containers'
8794
8795     my $rK_weld_left  = $self->[_rK_weld_left_];
8796     my $rK_weld_right = $self->[_rK_weld_right_];
8797
8798     # This routine implements the -wn flag by "welding together"
8799     # the nested closing and opening tokens which were previously
8800     # identified by sub 'find_nested_pairs'.  "welding" simply
8801     # involves setting certain hash values which will be checked
8802     # later during formatting.
8803
8804     my $rLL                       = $self->[_rLL_];
8805     my $rlines                    = $self->[_rlines_];
8806     my $K_opening_container       = $self->[_K_opening_container_];
8807     my $K_closing_container       = $self->[_K_closing_container_];
8808     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
8809     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
8810     my $ris_asub_block            = $self->[_ris_asub_block_];
8811     my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
8812
8813     # Find nested pairs of container tokens for any welding.
8814     my $rnested_pairs = $self->find_nested_pairs();
8815
8816     # Return unless there are nested pairs to weld
8817     return unless defined($rnested_pairs) && @{$rnested_pairs};
8818
8819     # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
8820     # pairs.  But it isn't clear if this is possible because we don't know
8821     # which sequences might actually start a weld.
8822
8823     # Setup a hash to avoid instabilities with combination -lp -wn -pvt=2.
8824     # We do this by reducing -vt=2 to -vt=1 where there could be a conflict
8825     # with welding at the same tokens.
8826     # See issues b1338, b1339, b1340, b1341, b1342, b1343.
8827     if ($rOpts_line_up_parentheses) {
8828
8829         # NOTE: just parens for now but this could be applied to all types if
8830         # necessary.
8831         if ( $opening_vertical_tightness{'('} == 2 ) {
8832             my $rreduce_vertical_tightness_by_seqno =
8833               $self->[_rreduce_vertical_tightness_by_seqno_];
8834             foreach my $item ( @{$rnested_pairs} ) {
8835                 my ( $inner_seqno, $outer_seqno ) = @{$item};
8836                 if ( !$ris_excluded_lp_container->{$outer_seqno} ) {
8837
8838                     # Set a flag which means that if a token has -vt=2
8839                     # then reduce it to -vt=1.
8840                     $rreduce_vertical_tightness_by_seqno->{$outer_seqno} = 1;
8841                 }
8842             }
8843         }
8844     }
8845
8846     my $rOpts_break_at_old_method_breakpoints =
8847       $rOpts->{'break-at-old-method-breakpoints'};
8848
8849     # This array will hold the sequence numbers of the tokens to be welded.
8850     my @welds;
8851
8852     # Variables needed for estimating line lengths
8853     my $maximum_text_length;    # maximum spaces available for text
8854     my $starting_lentot;        # cumulative text to start of current line
8855
8856     my $iline_outer_opening   = -1;
8857     my $weld_count_this_start = 0;
8858
8859     # OLD: $single_line_tol added to fix cases b1180 b1181
8860     #       = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
8861     # NEW: $single_line_tol=0;  fixes b1212 and b1180-1181 work now
8862     my $single_line_tol = 0;
8863
8864     my $multiline_tol = $single_line_tol + 1 +
8865       max( $rOpts_indent_columns, $rOpts_continuation_indentation );
8866
8867     # Define a welding cutoff level: do not start a weld if the inside
8868     # container level equals or exceeds this level.
8869
8870     # We use the minimum of two criteria, either of which may be more
8871     # restrictive.  The 'alpha' value is more restrictive in (b1206, b1252) and
8872     # the 'beta' value is more restrictive in other cases (b1243).
8873
8874     my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
8875
8876     # The vertical tightness flags can throw off line length calculations.
8877     # This patch was added to fix instability issue b1284.
8878     # It works to always use a tol of 1 for 1 line block length tests, but
8879     # this restricted value keeps test case wn6.wn working as before.
8880     # It may be necessary to include '[' and '{' here in the future.
8881     my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
8882
8883     my $length_to_opening_seqno = sub {
8884         my ($seqno) = @_;
8885         my $KK      = $K_opening_container->{$seqno};
8886         my $lentot  = defined($KK)
8887           && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8888         return $lentot;
8889     };
8890
8891     my $length_to_closing_seqno = sub {
8892         my ($seqno) = @_;
8893         my $KK      = $K_closing_container->{$seqno};
8894         my $lentot  = defined($KK)
8895           && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
8896         return $lentot;
8897     };
8898
8899     # Abbreviations:
8900     #  _oo=outer opening, i.e. first of  { {
8901     #  _io=inner opening, i.e. second of { {
8902     #  _oc=outer closing, i.e. second of } {
8903     #  _ic=inner closing, i.e. first of  } }
8904
8905     my $previous_pair;
8906
8907     # Main loop over nested pairs...
8908     # We are working from outermost to innermost pairs so that
8909     # level changes will be complete when we arrive at the inner pairs.
8910     while ( my $item = pop( @{$rnested_pairs} ) ) {
8911         my ( $inner_seqno, $outer_seqno ) = @{$item};
8912
8913         my $Kouter_opening = $K_opening_container->{$outer_seqno};
8914         my $Kinner_opening = $K_opening_container->{$inner_seqno};
8915         my $Kouter_closing = $K_closing_container->{$outer_seqno};
8916         my $Kinner_closing = $K_closing_container->{$inner_seqno};
8917
8918         # RULE: do not weld if inner container has <= 3 tokens unless the next
8919         # token is a heredoc (so we know there will be multiple lines)
8920         if ( $Kinner_closing - $Kinner_opening <= 4 ) {
8921             my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
8922             next unless defined($Knext_nonblank);
8923             my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
8924             next unless ( $type eq 'h' );
8925         }
8926
8927         my $outer_opening = $rLL->[$Kouter_opening];
8928         my $inner_opening = $rLL->[$Kinner_opening];
8929         my $outer_closing = $rLL->[$Kouter_closing];
8930         my $inner_closing = $rLL->[$Kinner_closing];
8931
8932         # RULE: do not weld to a hash brace.  The reason is that it has a very
8933         # strong bond strength to the next token, so a line break after it
8934         # may not work.  Previously we allowed welding to something like @{
8935         # but that caused blinking states (cases b751, b779).
8936         if ( $inner_opening->[_TYPE_] eq 'L' ) {
8937             next;
8938         }
8939
8940         # RULE: do not weld to a square bracket which does not contain commas
8941         if ( $inner_opening->[_TYPE_] eq '[' ) {
8942             my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
8943             next unless ($rtype_count);
8944             my $comma_count = $rtype_count->{','};
8945             next unless ($comma_count);
8946
8947             # Do not weld if there is text before a '[' such as here:
8948             #      curr_opt ( @beg [2,5] )
8949             # It will not break into the desired sandwich structure.
8950             # This fixes case b109, 110.
8951             my $Kdiff = $Kinner_opening - $Kouter_opening;
8952             next if ( $Kdiff > 2 );
8953             next
8954               if ( $Kdiff == 2
8955                 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
8956
8957         }
8958
8959         # RULE: Avoid welding under stress.  The idea is that we need to have a
8960         # little space* within a welded container to avoid instability.  Note
8961         # that after each weld the level values are reduced, so long multiple
8962         # welds can still be made.  This rule will seldom be a limiting factor
8963         # in actual working code. Fixes b1206, b1243.
8964         my $inner_level = $inner_opening->[_LEVEL_];
8965         if ( $inner_level >= $weld_cutoff_level ) { next }
8966
8967         # Set flag saying if this pair starts a new weld
8968         my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
8969
8970         # Set flag saying if this pair is adjacent to the previous nesting pair
8971         # (even if previous pair was rejected as a weld)
8972         my $touch_previous_pair =
8973           defined($previous_pair) && $outer_seqno == $previous_pair->[0];
8974         $previous_pair = $item;
8975
8976         my $do_not_weld_rule = 0;
8977         my $Msg              = EMPTY_STRING;
8978         my $is_one_line_weld;
8979
8980         my $iline_oo = $outer_opening->[_LINE_INDEX_];
8981         my $iline_io = $inner_opening->[_LINE_INDEX_];
8982         my $iline_ic = $inner_closing->[_LINE_INDEX_];
8983         my $iline_oc = $outer_closing->[_LINE_INDEX_];
8984         my $token_oo = $outer_opening->[_TOKEN_];
8985         my $token_io = $inner_opening->[_TOKEN_];
8986
8987         my $is_multiline_weld =
8988              $iline_oo == $iline_io
8989           && $iline_ic == $iline_oc
8990           && $iline_io != $iline_ic;
8991
8992         if (DEBUG_WELD) {
8993             my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
8994             my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
8995             $Msg .= <<EOM;
8996 Pair seqo=$outer_seqno seqi=$inner_seqno  lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
8997 Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
8998 tokens '$token_oo' .. '$token_io'
8999 EOM
9000         }
9001
9002         # DO-NOT-WELD RULE 0:
9003         # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
9004         # by one line).  This can produce instabilities (fixes b1250 b1251
9005         # 1256).
9006         if (  !$is_multiline_weld
9007             && $iline_ic == $iline_io + 1
9008             && $token_oo eq '('
9009             && $token_io eq '(' )
9010         {
9011             if (DEBUG_WELD) {
9012                 $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
9013                 print $Msg;
9014             }
9015             next;
9016         }
9017
9018         # If this pair is not adjacent to the previous pair (skipped or not),
9019         # then measure lengths from the start of line of oo.
9020         if (
9021             !$touch_previous_pair
9022
9023             # Also do this if restarting at a new line; fixes case b965, s001
9024             || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
9025           )
9026         {
9027
9028             # Remember the line we are using as a reference
9029             $iline_outer_opening   = $iline_oo;
9030             $weld_count_this_start = 0;
9031
9032             ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
9033               = $self->setup_new_weld_measurements( $Kouter_opening,
9034                 $Kinner_opening );
9035
9036             if (
9037                 !$new_weld_ok
9038                 && (   $iline_oo != $iline_io
9039                     || $iline_ic != $iline_oc )
9040               )
9041             {
9042                 if (DEBUG_WELD) { print $msg}
9043                 next;
9044             }
9045
9046             my $rK_range = $rlines->[$iline_oo]->{_rK_range};
9047             my ( $Kfirst, $Klast ) = @{$rK_range};
9048
9049             # An existing one-line weld is a line in which
9050             # (1) the containers are all on one line, and
9051             # (2) the line does not exceed the allowable length
9052             if ( $iline_oo == $iline_oc ) {
9053
9054                 # All the tokens are on one line, now check their length.
9055                 # Start with the full line index range. We will reduce this
9056                 # in the coding below in some cases.
9057                 my $Kstart = $Kfirst;
9058                 my $Kstop  = $Klast;
9059
9060                 # Note that the following minimal choice for measuring will
9061                 # work and will not cause any instabilities because it is
9062                 # invariant:
9063
9064                 ##  my $Kstart = $Kouter_opening;
9065                 ##  my $Kstop  = $Kouter_closing;
9066
9067                 # But that can lead to some undesirable welds.  So a little
9068                 # more complicated method has been developed.
9069
9070                 # We are trying to avoid creating bad two-line welds when we are
9071                 # working on long, previously un-welded input text, such as
9072
9073                 # INPUT (example of a long input line weld candidate):
9074                 ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
9075
9076                 #  GOOD two-line break: (not welded; result marked too long):
9077                 ## $mutation->transpos(
9078                 ##                 $self->RNA->position($mutation->label, $atg_label));
9079
9080                 #  BAD two-line break: (welded; result if we weld):
9081                 ## $mutation->transpos($self->RNA->position(
9082                 ##                                      $mutation->label, $atg_label));
9083
9084                 # We can only get an approximate estimate of the final length,
9085                 # since the line breaks may change, and for -lp mode because
9086                 # even the indentation is not yet known.
9087
9088                 my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
9089                 my $level_last  = $rLL->[$Klast]->[_LEVEL_];
9090                 my $level_oo    = $rLL->[$Kouter_opening]->[_LEVEL_];
9091                 my $level_oc    = $rLL->[$Kouter_closing]->[_LEVEL_];
9092
9093                 # - measure to the end of the original line if balanced
9094                 # - measure to the closing container if unbalanced (fixes b1230)
9095                 #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
9096                 if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
9097
9098                 # - measure from the start of the original line if balanced
9099                 # - measure from the most previous token with same level
9100                 #   if unbalanced (b1232)
9101                 if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
9102                     $Kstart = $Kouter_opening;
9103
9104                     foreach
9105                       my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
9106                     {
9107                         next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
9108                         last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
9109                         $Kstart = $KK;
9110                     }
9111                 }
9112
9113                 my $excess =
9114                   $self->excess_line_length_for_Krange( $Kstart, $Kstop );
9115
9116                 # Coding simplified here for case b1219.
9117                 # Increased tol from 0 to 1 when pvt>0 to fix b1284.
9118                 $is_one_line_weld = $excess <= $one_line_tol;
9119             }
9120
9121             # DO-NOT-WELD RULE 1:
9122             # Do not weld something that looks like the start of a two-line
9123             # function call, like this: <<snippets/wn6.in>>
9124             #    $trans->add_transformation(
9125             #        PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
9126             # We will look for a semicolon after the closing paren.
9127
9128             # We want to weld something complex, like this though
9129             # my $compass = uc( opposite_direction( line_to_canvas_direction(
9130             #     @{ $coords[0] }, @{ $coords[1] } ) ) );
9131             # Otherwise we will get a 'blinker'. For example, the following
9132             # would become a blinker without this rule:
9133             #        $Self->_Add( $SortOrderDisplay{ $Field
9134             #              ->GenerateFieldForSelectSQL() } );
9135             # But it is okay to weld a two-line statement if it looks like
9136             # it was already welded, meaning that the two opening containers are
9137             # on a different line that the two closing containers.  This is
9138             # necessary to prevent blinking of something like this with
9139             # perltidy -wn -pbp (starting indentation two levels deep):
9140
9141             # $top_label->set_text( gettext(
9142             #    "Unable to create personal directory - check permissions.") );
9143             if (   $iline_oc == $iline_oo + 1
9144                 && $iline_io == $iline_ic
9145                 && $token_oo eq '(' )
9146             {
9147
9148                 # Look for following semicolon...
9149                 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
9150                 my $next_nonblank_type =
9151                   defined($Knext_nonblank)
9152                   ? $rLL->[$Knext_nonblank]->[_TYPE_]
9153                   : 'b';
9154                 if ( $next_nonblank_type eq ';' ) {
9155
9156                     # Then do not weld if no other containers between inner
9157                     # opening and closing.
9158                     my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
9159                     if ( $Knext_seq_item == $Kinner_closing ) {
9160                         $do_not_weld_rule = 1;
9161                     }
9162                 }
9163             }
9164         } ## end starting new weld sequence
9165
9166         else {
9167
9168             # set the 1-line flag if continuing a weld sequence; fixes b1239
9169             $is_one_line_weld = ( $iline_oo == $iline_oc );
9170         }
9171
9172         # DO-NOT-WELD RULE 2:
9173         # Do not weld an opening paren to an inner one line brace block
9174         # We will just use old line numbers for this test and require
9175         # iterations if necessary for convergence
9176
9177         # For example, otherwise we could cause the opening paren
9178         # in the following example to separate from the caller name
9179         # as here:
9180
9181         #    $_[0]->code_handler
9182         #      ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
9183
9184         # Here is another example where we do not want to weld:
9185         #  $wrapped->add_around_modifier(
9186         #    sub { push @tracelog => 'around 1'; $_[0]->(); } );
9187
9188         # If the one line sub block gets broken due to length or by the
9189         # user, then we can weld.  The result will then be:
9190         # $wrapped->add_around_modifier( sub {
9191         #    push @tracelog => 'around 1';
9192         #    $_[0]->();
9193         # } );
9194
9195         # Updated to fix cases b1082 b1102 b1106 b1115:
9196         # Also, do not weld to an intact inner block if the outer opening token
9197         # is on a different line. For example, this prevents oscillation
9198         # between these two states in case b1106:
9199
9200         #    return map{
9201         #        ($_,[$self->$_(@_[1..$#_])])
9202         #    }@every;
9203
9204         #    return map { (
9205         #        $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
9206         #    ) } @every;
9207
9208         # The effect of this change on typical code is very minimal.  Sometimes
9209         # it may take a second iteration to converge, but this gives protection
9210         # against blinking.
9211         if (   !$do_not_weld_rule
9212             && !$is_one_line_weld
9213             && $iline_ic == $iline_io )
9214         {
9215             $do_not_weld_rule = 2
9216               if ( $token_oo eq '(' || $iline_oo != $iline_io );
9217         }
9218
9219         # DO-NOT-WELD RULE 2A:
9220         # Do not weld an opening asub brace in -lp mode if -asbl is set. This
9221         # helps avoid instabilities in one-line block formation, and fixes
9222         # b1241.  Previously, the '$is_one_line_weld' flag was tested here
9223         # instead of -asbl, and this fixed most cases. But it turns out that
9224         # the real problem was the -asbl flag, and switching to this was
9225         # necessary to fixe b1268.  This also fixes b1269, b1277, b1278.
9226         if (
9227             !$do_not_weld_rule
9228             ##&& $is_one_line_weld
9229             && $rOpts_line_up_parentheses
9230             && $rOpts_asbl
9231             && $ris_asub_block->{$outer_seqno}
9232           )
9233         {
9234             $do_not_weld_rule = '2A';
9235         }
9236
9237         # DO-NOT-WELD RULE 3:
9238         # Do not weld if this makes our line too long.
9239         # Use a tolerance which depends on if the old tokens were welded
9240         # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
9241         if ( !$do_not_weld_rule ) {
9242
9243             # Measure to a little beyond the inner opening token if it is
9244             # followed by a bare word, which may have unusual line break rules.
9245
9246             # NOTE: Originally this was OLD RULE 6: do not weld to a container
9247             # which is followed on the same line by an unknown bareword token.
9248             # This can cause blinkers (cases b626, b611).  But OK to weld one
9249             # line welds to fix cases b1057 b1064.  For generality, OLD RULE 6
9250             # has been merged into RULE 3 here to also fix cases b1078 b1091.
9251
9252             my $K_for_length = $Kinner_opening;
9253             my $Knext_io     = $self->K_next_nonblank($Kinner_opening);
9254             next unless ( defined($Knext_io) );    # shouldn't happen
9255             my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
9256
9257             # Note: may need to eventually also include other types here,
9258             # such as 'Z' and 'Y':   if ($type_io_next =~ /^[ZYw]$/) {
9259             if ( $type_io_next eq 'w' ) {
9260                 my $Knext_io2 = $self->K_next_nonblank($Knext_io);
9261                 next unless ( defined($Knext_io2) );
9262                 my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
9263                 if ( !$type_ok_after_bareword{$type_io_next2} ) {
9264                     $K_for_length = $Knext_io2;
9265                 }
9266             }
9267
9268             # Use a tolerance for welds over multiple lines to avoid blinkers.
9269             # We can use zero tolerance if it looks like we are working on an
9270             # existing weld.
9271             my $tol =
9272                 $is_one_line_weld || $is_multiline_weld
9273               ? $single_line_tol
9274               : $multiline_tol;
9275
9276             # By how many characters does this exceed the text window?
9277             my $excess =
9278               $self->cumulative_length_before_K($K_for_length) -
9279               $starting_lentot + 1 + $tol -
9280               $maximum_text_length;
9281
9282             # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
9283             # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
9284             # Revised patch: New tolerance definition allows going back to '> 0'
9285             # here.  This fixes case b1124.  See also cases b1087 and b1087a.
9286             if ( $excess > 0 ) { $do_not_weld_rule = 3 }
9287
9288             if (DEBUG_WELD) {
9289                 $Msg .=
9290 "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
9291             }
9292         }
9293
9294         # DO-NOT-WELD RULE 4; implemented for git#10:
9295         # Do not weld an opening -ce brace if the next container is on a single
9296         # line, different from the opening brace. (This is very rare).  For
9297         # example, given the following with -ce, we will avoid joining the {
9298         # and [
9299
9300         #  } else {
9301         #      [ $_, length($_) ]
9302         #  }
9303
9304         # because this would produce a terminal one-line block:
9305
9306         #  } else { [ $_, length($_) ]  }
9307
9308         # which may not be what is desired. But given this input:
9309
9310         #  } else { [ $_, length($_) ]  }
9311
9312         # then we will do the weld and retain the one-line block
9313         if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
9314             my $block_type = $rblock_type_of_seqno->{$outer_seqno};
9315             if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
9316                 my $io_line = $inner_opening->[_LINE_INDEX_];
9317                 my $ic_line = $inner_closing->[_LINE_INDEX_];
9318                 my $oo_line = $outer_opening->[_LINE_INDEX_];
9319                 if ( $oo_line < $io_line && $ic_line == $io_line ) {
9320                     $do_not_weld_rule = 4;
9321                 }
9322             }
9323         }
9324
9325         # DO-NOT-WELD RULE 5: do not include welds excluded by user
9326         if (
9327               !$do_not_weld_rule
9328             && %weld_nested_exclusion_rules
9329             && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
9330                 || $self->is_excluded_weld( $Kinner_opening, 0 ) )
9331           )
9332         {
9333             $do_not_weld_rule = 5;
9334         }
9335
9336         # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
9337
9338         # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
9339         # (case b973)
9340         if (  !$do_not_weld_rule
9341             && $rOpts_break_at_old_method_breakpoints
9342             && $iline_io > $iline_oo )
9343         {
9344
9345             foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
9346                 my $rK_range = $rlines->[$iline]->{_rK_range};
9347                 next unless defined($rK_range);
9348                 my ( $Kfirst, $Klast ) = @{$rK_range};
9349                 next unless defined($Kfirst);
9350                 if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
9351                     $do_not_weld_rule = 7;
9352                     last;
9353                 }
9354             }
9355         }
9356
9357         if ($do_not_weld_rule) {
9358
9359             # After neglecting a pair, we start measuring from start of point
9360             # io ... but not if previous type does not like to be separated
9361             # from its container (fixes case b1184)
9362             my $Kprev     = $self->K_previous_nonblank($Kinner_opening);
9363             my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
9364             if ( !$has_tight_paren{$type_prev} ) {
9365                 my $starting_level    = $inner_opening->[_LEVEL_];
9366                 my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
9367                 $starting_lentot =
9368                   $self->cumulative_length_before_K($Kinner_opening);
9369                 $maximum_text_length =
9370                   $maximum_text_length_at_level[$starting_level] -
9371                   $starting_ci_level * $rOpts_continuation_indentation;
9372             }
9373
9374             if (DEBUG_WELD) {
9375                 $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
9376                 print $Msg;
9377             }
9378
9379             # Normally, a broken pair should not decrease indentation of
9380             # intermediate tokens:
9381             ##      if ( $last_pair_broken ) { next }
9382             # However, for long strings of welded tokens, such as '{{{{{{...'
9383             # we will allow broken pairs to also remove indentation.
9384             # This will keep very long strings of opening and closing
9385             # braces from marching off to the right.  We will do this if the
9386             # number of tokens in a weld before the broken weld is 4 or more.
9387             # This rule will mainly be needed for test scripts, since typical
9388             # welds have fewer than about 4 welded tokens.
9389             if ( !@welds || @{ $welds[-1] } < 4 ) { next }
9390         }
9391
9392         # otherwise start new weld ...
9393         elsif ($starting_new_weld) {
9394             $weld_count_this_start++;
9395             if (DEBUG_WELD) {
9396                 $Msg .= "Starting new weld\n";
9397                 print $Msg;
9398             }
9399             push @welds, $item;
9400
9401             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9402             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
9403
9404             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9405             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
9406         }
9407
9408         # ... or extend current weld
9409         else {
9410             $weld_count_this_start++;
9411             if (DEBUG_WELD) {
9412                 $Msg .= "Extending current weld\n";
9413                 print $Msg;
9414             }
9415             unshift @{ $welds[-1] }, $inner_seqno;
9416             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9417             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
9418
9419             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9420             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
9421         }
9422
9423         # After welding, reduce the indentation level if all intermediate tokens
9424         my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
9425         if ( $dlevel != 0 ) {
9426             my $Kstart = $Kinner_opening;
9427             my $Kstop  = $Kinner_closing;
9428             foreach my $KK ( $Kstart .. $Kstop ) {
9429                 $rLL->[$KK]->[_LEVEL_] += $dlevel;
9430             }
9431
9432             # Copy opening ci level to help break at = for -lp mode (case b1124)
9433             $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
9434               $rLL->[$Kouter_opening]->[_CI_LEVEL_];
9435
9436             # But do not copy the closing ci level ... it can give poor results
9437             ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
9438             ##  $rLL->[$Kouter_closing]->[_CI_LEVEL_];
9439         }
9440     }
9441
9442     return;
9443 } ## end sub weld_nested_containers
9444
9445 sub weld_nested_quotes {
9446
9447     # Called once per file for option '--weld-nested-containers'. This
9448     # does welding on qw quotes.
9449
9450     my $self = shift;
9451
9452     # See if quotes are excluded from welding
9453     my $rflags = $weld_nested_exclusion_rules{'q'};
9454     return if ( defined($rflags) && defined( $rflags->[1] ) );
9455
9456     my $rK_weld_left  = $self->[_rK_weld_left_];
9457     my $rK_weld_right = $self->[_rK_weld_right_];
9458
9459     my $rLL = $self->[_rLL_];
9460     return unless ( defined($rLL) && @{$rLL} );
9461     my $Num = @{$rLL};
9462
9463     my $K_opening_container = $self->[_K_opening_container_];
9464     my $K_closing_container = $self->[_K_closing_container_];
9465     my $rlines              = $self->[_rlines_];
9466
9467     my $starting_lentot;
9468     my $maximum_text_length;
9469
9470     my $is_single_quote = sub {
9471         my ( $Kbeg, $Kend, $quote_type ) = @_;
9472         foreach my $K ( $Kbeg .. $Kend ) {
9473             my $test_type = $rLL->[$K]->[_TYPE_];
9474             next   if ( $test_type eq 'b' );
9475             return if ( $test_type ne $quote_type );
9476         }
9477         return 1;
9478     };
9479
9480     # Length tolerance - same as previously used for sub weld_nested
9481     my $multiline_tol =
9482       1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
9483
9484     # look for single qw quotes nested in containers
9485     my $KNEXT = $self->[_K_first_seq_item_];
9486     while ( defined($KNEXT) ) {
9487         my $KK = $KNEXT;
9488         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9489         my $rtoken_vars = $rLL->[$KK];
9490         my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
9491         if ( !$outer_seqno ) {
9492             next if ( $KK == 0 );    # first token in file may not be container
9493
9494             # A fault here implies that an error was made in the little loop at
9495             # the bottom of sub 'respace_tokens' which set the values of
9496             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
9497             # loop control lines above.
9498             Fault("sequence = $outer_seqno not defined at K=$KK")
9499               if (DEVEL_MODE);
9500             next;
9501         }
9502
9503         my $token = $rtoken_vars->[_TOKEN_];
9504         if ( $is_opening_token{$token} ) {
9505
9506             # see if the next token is a quote of some type
9507             my $Kn = $KK + 1;
9508             $Kn += 1
9509               if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
9510             next unless ( $Kn < $Num );
9511
9512             my $next_token = $rLL->[$Kn]->[_TOKEN_];
9513             my $next_type  = $rLL->[$Kn]->[_TYPE_];
9514             next
9515               unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
9516                 && $next_token =~ /^q/ );
9517
9518             # The token before the closing container must also be a quote
9519             my $Kouter_closing = $K_closing_container->{$outer_seqno};
9520             my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
9521             next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
9522
9523             # This is an inner opening container
9524             my $Kinner_opening = $Kn;
9525
9526             # Do not weld to single-line quotes. Nothing is gained, and it may
9527             # look bad.
9528             next if ( $Kinner_closing == $Kinner_opening );
9529
9530             # Only weld to quotes delimited with container tokens. This is
9531             # because welding to arbitrary quote delimiters can produce code
9532             # which is less readable than without welding.
9533             my $closing_delimiter =
9534               substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
9535             next
9536               unless ( $is_closing_token{$closing_delimiter}
9537                 || $closing_delimiter eq '>' );
9538
9539             # Now make sure that there is just a single quote in the container
9540             next
9541               unless (
9542                 $is_single_quote->(
9543                     $Kinner_opening + 1,
9544                     $Kinner_closing - 1,
9545                     $next_type
9546                 )
9547               );
9548
9549             # OK: This is a candidate for welding
9550             my $Msg = EMPTY_STRING;
9551             my $do_not_weld;
9552
9553             my $Kouter_opening = $K_opening_container->{$outer_seqno};
9554             my $iline_oo       = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
9555             my $iline_io       = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
9556             my $iline_oc       = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
9557             my $iline_ic       = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
9558             my $is_old_weld =
9559               ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
9560
9561             # Fix for case b1189. If quote is marked as type 'Q' then only weld
9562             # if the two closing tokens are on the same input line.  Otherwise,
9563             # the closing line will be output earlier in the pipeline than
9564             # other CODE lines and welding will not actually occur. This will
9565             # leave a half-welded structure with potential formatting
9566             # instability.  This might be fixed by adding a check for a weld on
9567             # a closing Q token and sending it down the normal channel, but it
9568             # would complicate the code and is potentially risky.
9569             next
9570               if (!$is_old_weld
9571                 && $next_type eq 'Q'
9572                 && $iline_ic != $iline_oc );
9573
9574             # If welded, the line must not exceed allowed line length
9575             ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
9576               = $self->setup_new_weld_measurements( $Kouter_opening,
9577                 $Kinner_opening );
9578             if ( !$ok_to_weld ) {
9579                 if (DEBUG_WELD) { print $msg}
9580                 next;
9581             }
9582
9583             my $length =
9584               $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
9585             my $excess = $length + $multiline_tol - $maximum_text_length;
9586
9587             my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
9588             if ( $excess >= $excess_max ) {
9589                 $do_not_weld = 1;
9590             }
9591
9592             if (DEBUG_WELD) {
9593                 if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
9594                 $Msg .=
9595 "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
9596             }
9597
9598             # Check weld exclusion rules for outer container
9599             if ( !$do_not_weld ) {
9600                 my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
9601                 if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
9602                     if (DEBUG_WELD) {
9603                         $Msg .=
9604 "No qw weld due to weld exclusion rules for outer container\n";
9605                     }
9606                     $do_not_weld = 1;
9607                 }
9608             }
9609
9610             # Check the length of the last line (fixes case b1039)
9611             if ( !$do_not_weld ) {
9612                 my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
9613                 my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
9614                 my $excess_ic =
9615                   $self->excess_line_length_for_Krange( $Kfirst_ic,
9616                     $Kouter_closing );
9617
9618                 # Allow extra space for additional welded closing container(s)
9619                 # and a space and comma or semicolon.
9620                 # NOTE: weld len has not been computed yet. Use 2 spaces
9621                 # for now, correct for a single weld. This estimate could
9622                 # be made more accurate if necessary.
9623                 my $weld_len =
9624                   defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
9625                 if ( $excess_ic + $weld_len + 2 > 0 ) {
9626                     if (DEBUG_WELD) {
9627                         $Msg .=
9628 "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
9629                     }
9630                     $do_not_weld = 1;
9631                 }
9632             }
9633
9634             if ($do_not_weld) {
9635                 if (DEBUG_WELD) {
9636                     $Msg .= "Not Welding QW\n";
9637                     print $Msg;
9638                 }
9639                 next;
9640             }
9641
9642             # OK to weld
9643             if (DEBUG_WELD) {
9644                 $Msg .= "Welding QW\n";
9645                 print $Msg;
9646             }
9647
9648             $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
9649             $rK_weld_left->{$Kinner_opening}  = $Kouter_opening;
9650
9651             $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
9652             $rK_weld_left->{$Kouter_closing}  = $Kinner_closing;
9653
9654             # Undo one indentation level if an extra level was added to this
9655             # multiline quote
9656             my $qw_seqno =
9657               $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
9658             if (   $qw_seqno
9659                 && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
9660             {
9661                 foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
9662                     $rLL->[$K]->[_LEVEL_] -= 1;
9663                 }
9664                 $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
9665                 $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
9666             }
9667
9668             # undo CI for other welded quotes
9669             else {
9670
9671                 foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
9672                     $rLL->[$K]->[_CI_LEVEL_] = 0;
9673                 }
9674             }
9675
9676             # Change the level of a closing qw token to be that of the outer
9677             # containing token. This will allow -lp indentation to function
9678             # correctly in the vertical aligner.
9679             # Patch to fix c002: but not if it contains text
9680             if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
9681                 $rLL->[$Kinner_closing]->[_LEVEL_] =
9682                   $rLL->[$Kouter_closing]->[_LEVEL_];
9683             }
9684         }
9685     }
9686     return;
9687 } ## end sub weld_nested_quotes
9688
9689 sub is_welded_at_seqno {
9690
9691     my ( $self, $seqno ) = @_;
9692
9693     # given a sequence number:
9694     #   return true if it is welded either left or right
9695     #   return false otherwise
9696     return unless ( $total_weld_count && defined($seqno) );
9697     my $KK_o = $self->[_K_opening_container_]->{$seqno};
9698     return unless defined($KK_o);
9699     return defined( $self->[_rK_weld_left_]->{$KK_o} )
9700       || defined( $self->[_rK_weld_right_]->{$KK_o} );
9701 } ## end sub is_welded_at_seqno
9702
9703 sub mark_short_nested_blocks {
9704
9705     # This routine looks at the entire file and marks any short nested blocks
9706     # which should not be broken.  The results are stored in the hash
9707     #     $rshort_nested->{$type_sequence}
9708     # which will be true if the container should remain intact.
9709     #
9710     # For example, consider the following line:
9711
9712     #   sub cxt_two { sort { $a <=> $b } test_if_list() }
9713
9714     # The 'sort' block is short and nested within an outer sub block.
9715     # Normally, the existence of the 'sort' block will force the sub block to
9716     # break open, but this is not always desirable. Here we will set a flag for
9717     # the sort block to prevent this.  To give the user control, we will
9718     # follow the input file formatting.  If either of the blocks is broken in
9719     # the input file then we will allow it to remain broken. Otherwise we will
9720     # set a flag to keep it together in later formatting steps.
9721
9722     # The flag which is set here will be checked in two places:
9723     # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
9724
9725     my $self = shift;
9726     return if $rOpts->{'indent-only'};
9727
9728     my $rLL = $self->[_rLL_];
9729     return unless ( defined($rLL) && @{$rLL} );
9730
9731     return unless ( $rOpts->{'one-line-block-nesting'} );
9732
9733     my $K_opening_container  = $self->[_K_opening_container_];
9734     my $K_closing_container  = $self->[_K_closing_container_];
9735     my $rbreak_container     = $self->[_rbreak_container_];
9736     my $rshort_nested        = $self->[_rshort_nested_];
9737     my $rlines               = $self->[_rlines_];
9738     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
9739
9740     # Variables needed for estimating line lengths
9741     my $maximum_text_length;
9742     my $starting_lentot;
9743     my $length_tol = 1;
9744
9745     my $excess_length_to_K = sub {
9746         my ($K) = @_;
9747
9748         # Estimate the length from the line start to a given token
9749         my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
9750         my $excess_length = $length + $length_tol - $maximum_text_length;
9751         return ($excess_length);
9752     };
9753
9754     my $is_broken_block = sub {
9755
9756         # a block is broken if the input line numbers of the braces differ
9757         my ($seqno) = @_;
9758         my $K_opening = $K_opening_container->{$seqno};
9759         return unless ( defined($K_opening) );
9760         my $K_closing = $K_closing_container->{$seqno};
9761         return unless ( defined($K_closing) );
9762         return $rbreak_container->{$seqno}
9763           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
9764           $rLL->[$K_opening]->[_LINE_INDEX_];
9765     };
9766
9767     # loop over all containers
9768     my @open_block_stack;
9769     my $iline = -1;
9770     my $KNEXT = $self->[_K_first_seq_item_];
9771     while ( defined($KNEXT) ) {
9772         my $KK = $KNEXT;
9773         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
9774         my $rtoken_vars   = $rLL->[$KK];
9775         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
9776         if ( !$type_sequence ) {
9777             next if ( $KK == 0 );    # first token in file may not be container
9778
9779             # A fault here implies that an error was made in the little loop at
9780             # the bottom of sub 'respace_tokens' which set the values of
9781             # _KNEXT_SEQ_ITEM_.  Or an error has been introduced in the
9782             # loop control lines above.
9783             Fault("sequence = $type_sequence not defined at K=$KK")
9784               if (DEVEL_MODE);
9785             next;
9786         }
9787
9788         # Patch: do not mark short blocks with welds.
9789         # In some cases blinkers can form (case b690).
9790         if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
9791             next;
9792         }
9793
9794         # We are just looking at code blocks
9795         my $token = $rtoken_vars->[_TOKEN_];
9796         my $type  = $rtoken_vars->[_TYPE_];
9797         next unless ( $type eq $token );
9798         next unless ( $rblock_type_of_seqno->{$type_sequence} );
9799
9800         # Keep a stack of all acceptable block braces seen.
9801         # Only consider blocks entirely on one line so dump the stack when line
9802         # changes.
9803         my $iline_last = $iline;
9804         $iline = $rLL->[$KK]->[_LINE_INDEX_];
9805         if ( $iline != $iline_last ) { @open_block_stack = () }
9806
9807         if ( $token eq '}' ) {
9808             if (@open_block_stack) { pop @open_block_stack }
9809         }
9810         next unless ( $token eq '{' );
9811
9812         # block must be balanced (bad scripts may be unbalanced)
9813         my $K_opening = $K_opening_container->{$type_sequence};
9814         my $K_closing = $K_closing_container->{$type_sequence};
9815         next unless ( defined($K_opening) && defined($K_closing) );
9816
9817         # require that this block be entirely on one line
9818         next if ( $is_broken_block->($type_sequence) );
9819
9820         # See if this block fits on one line of allowed length (which may
9821         # be different from the input script)
9822         $starting_lentot =
9823           $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
9824         my $level    = $rLL->[$KK]->[_LEVEL_];
9825         my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
9826         $maximum_text_length =
9827           $maximum_text_length_at_level[$level] -
9828           $ci_level * $rOpts_continuation_indentation;
9829
9830         # Dump the stack if block is too long and skip this block
9831         if ( $excess_length_to_K->($K_closing) > 0 ) {
9832             @open_block_stack = ();
9833             next;
9834         }
9835
9836         # OK, Block passes tests, remember it
9837         push @open_block_stack, $type_sequence;
9838
9839         # We are only marking nested code blocks,
9840         # so check for a previous block on the stack
9841         next unless ( @open_block_stack > 1 );
9842
9843         # Looks OK, mark this as a short nested block
9844         $rshort_nested->{$type_sequence} = 1;
9845
9846     }
9847     return;
9848 } ## end sub mark_short_nested_blocks
9849
9850 sub adjust_indentation_levels {
9851
9852     my ($self) = @_;
9853
9854     # Called once per file to do special indentation adjustments.
9855     # These routines adjust levels either by changing _CI_LEVEL_ directly or
9856     # by setting modified levels in the array $self->[_radjusted_levels_].
9857
9858     # Initialize the adjusted levels. These will be the levels actually used
9859     # for computing indentation.
9860
9861     # NOTE: This routine is called after the weld routines, which may have
9862     # already adjusted _LEVEL_, so we are making adjustments on top of those
9863     # levels.  It would be much nicer to have the weld routines also use this
9864     # adjustment, but that gets complicated when we combine -gnu -wn and have
9865     # some welded quotes.
9866     my $Klimit           = $self->[_Klimit_];
9867     my $rLL              = $self->[_rLL_];
9868     my $radjusted_levels = $self->[_radjusted_levels_];
9869
9870     return unless ( defined($Klimit) );
9871
9872     foreach my $KK ( 0 .. $Klimit ) {
9873         $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
9874     }
9875
9876     # First set adjusted levels for any non-indenting braces.
9877     $self->do_non_indenting_braces();
9878
9879     # Adjust breaks and indentation list containers
9880     $self->break_before_list_opening_containers();
9881
9882     # Set adjusted levels for the whitespace cycle option.
9883     $self->whitespace_cycle_adjustment();
9884
9885     $self->braces_left_setup();
9886
9887     # Adjust continuation indentation if -bli is set
9888     $self->bli_adjustment();
9889
9890     $self->extended_ci()
9891       if ($rOpts_extended_continuation_indentation);
9892
9893     # Now clip any adjusted levels to be non-negative
9894     $self->clip_adjusted_levels();
9895
9896     return;
9897 } ## end sub adjust_indentation_levels
9898
9899 sub clip_adjusted_levels {
9900
9901     # Replace any negative adjusted levels with zero.
9902     # Negative levels can occur in files with brace errors.
9903     my ($self) = @_;
9904     my $radjusted_levels = $self->[_radjusted_levels_];
9905     return unless defined($radjusted_levels) && @{$radjusted_levels};
9906     foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
9907     return;
9908 } ## end sub clip_adjusted_levels
9909
9910 sub do_non_indenting_braces {
9911
9912     # Called once per file to handle the --non-indenting-braces parameter.
9913     # Remove indentation within marked braces if requested
9914     my ($self) = @_;
9915
9916     # Any non-indenting braces have been found by sub find_non_indenting_braces
9917     # and are defined by the following hash:
9918     my $rseqno_non_indenting_brace_by_ix =
9919       $self->[_rseqno_non_indenting_brace_by_ix_];
9920     return unless ( %{$rseqno_non_indenting_brace_by_ix} );
9921
9922     my $rLL                        = $self->[_rLL_];
9923     my $rlines                     = $self->[_rlines_];
9924     my $K_opening_container        = $self->[_K_opening_container_];
9925     my $K_closing_container        = $self->[_K_closing_container_];
9926     my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
9927     my $radjusted_levels           = $self->[_radjusted_levels_];
9928
9929     # First locate all of the marked blocks
9930     my @K_stack;
9931     foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
9932         my $seqno          = $rseqno_non_indenting_brace_by_ix->{$ix};
9933         my $KK             = $K_opening_container->{$seqno};
9934         my $line_of_tokens = $rlines->[$ix];
9935         my $rK_range       = $line_of_tokens->{_rK_range};
9936         my ( $Kfirst, $Klast ) = @{$rK_range};
9937         $rspecial_side_comment_type->{$Klast} = 'NIB';
9938         push @K_stack, [ $KK, 1 ];
9939         my $Kc = $K_closing_container->{$seqno};
9940         push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
9941     }
9942     return unless (@K_stack);
9943     @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
9944
9945     # Then loop to remove indentation within marked blocks
9946     my $KK_last = 0;
9947     my $ndeep   = 0;
9948     foreach my $item (@K_stack) {
9949         my ( $KK, $inc ) = @{$item};
9950         if ( $ndeep > 0 ) {
9951
9952             foreach ( $KK_last + 1 .. $KK ) {
9953                 $radjusted_levels->[$_] -= $ndeep;
9954             }
9955
9956             # We just subtracted the old $ndeep value, which only applies to a
9957             # '{'.  The new $ndeep applies to a '}', so we undo the error.
9958             if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
9959         }
9960
9961         $ndeep += $inc;
9962         $KK_last = $KK;
9963     }
9964     return;
9965 } ## end sub do_non_indenting_braces
9966
9967 sub whitespace_cycle_adjustment {
9968
9969     my $self = shift;
9970
9971     # Called once per file to implement the --whitespace-cycle option
9972     my $rLL = $self->[_rLL_];
9973     return unless ( defined($rLL) && @{$rLL} );
9974     my $radjusted_levels = $self->[_radjusted_levels_];
9975     my $maximum_level    = $self->[_maximum_level_];
9976
9977     if (   $rOpts_whitespace_cycle
9978         && $rOpts_whitespace_cycle > 0
9979         && $rOpts_whitespace_cycle < $maximum_level )
9980     {
9981
9982         my $Kmax = @{$rLL} - 1;
9983
9984         my $whitespace_last_level  = -1;
9985         my @whitespace_level_stack = ();
9986         my $last_nonblank_type     = 'b';
9987         my $last_nonblank_token    = EMPTY_STRING;
9988         foreach my $KK ( 0 .. $Kmax ) {
9989             my $level_abs = $radjusted_levels->[$KK];
9990             my $level     = $level_abs;
9991             if ( $level_abs < $whitespace_last_level ) {
9992                 pop(@whitespace_level_stack);
9993             }
9994             if ( !@whitespace_level_stack ) {
9995                 push @whitespace_level_stack, $level_abs;
9996             }
9997             elsif ( $level_abs > $whitespace_last_level ) {
9998                 $level = $whitespace_level_stack[-1] +
9999                   ( $level_abs - $whitespace_last_level );
10000
10001                 if (
10002                     # 1 Try to break at a block brace
10003                     (
10004                            $level > $rOpts_whitespace_cycle
10005                         && $last_nonblank_type eq '{'
10006                         && $last_nonblank_token eq '{'
10007                     )
10008
10009                     # 2 Then either a brace or bracket
10010                     || (   $level > $rOpts_whitespace_cycle + 1
10011                         && $last_nonblank_token =~ /^[\{\[]$/ )
10012
10013                     # 3 Then a paren too
10014                     || $level > $rOpts_whitespace_cycle + 2
10015                   )
10016                 {
10017                     $level = 1;
10018                 }
10019                 push @whitespace_level_stack, $level;
10020             }
10021             $level = $whitespace_level_stack[-1];
10022             $radjusted_levels->[$KK] = $level;
10023
10024             $whitespace_last_level = $level_abs;
10025             my $type  = $rLL->[$KK]->[_TYPE_];
10026             my $token = $rLL->[$KK]->[_TOKEN_];
10027             if ( $type ne 'b' ) {
10028                 $last_nonblank_type  = $type;
10029                 $last_nonblank_token = $token;
10030             }
10031         }
10032     }
10033     return;
10034 } ## end sub whitespace_cycle_adjustment
10035
10036 use constant DEBUG_BBX => 0;
10037
10038 sub break_before_list_opening_containers {
10039
10040     my ($self) = @_;
10041
10042     # This routine is called once per batch to implement parameters
10043     # --break-before-hash-brace=n and similar -bbx=n flags
10044     #    and their associated indentation flags:
10045     # --break-before-hash-brace-and-indent and similar -bbxi=n
10046
10047     # Nothing to do if none of the -bbx=n parameters has been set
10048     return unless %break_before_container_types;
10049
10050     my $rLL = $self->[_rLL_];
10051     return unless ( defined($rLL) && @{$rLL} );
10052
10053     # Loop over all opening container tokens
10054     my $K_opening_container       = $self->[_K_opening_container_];
10055     my $K_closing_container       = $self->[_K_closing_container_];
10056     my $ris_broken_container      = $self->[_ris_broken_container_];
10057     my $ris_permanently_broken    = $self->[_ris_permanently_broken_];
10058     my $rhas_list                 = $self->[_rhas_list_];
10059     my $rhas_broken_list          = $self->[_rhas_broken_list_];
10060     my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
10061     my $radjusted_levels          = $self->[_radjusted_levels_];
10062     my $rparent_of_seqno          = $self->[_rparent_of_seqno_];
10063     my $rlines                    = $self->[_rlines_];
10064     my $rtype_count_by_seqno      = $self->[_rtype_count_by_seqno_];
10065     my $rlec_count_by_seqno       = $self->[_rlec_count_by_seqno_];
10066     my $rno_xci_by_seqno          = $self->[_rno_xci_by_seqno_];
10067     my $rK_weld_right             = $self->[_rK_weld_right_];
10068     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
10069
10070     my $length_tol =
10071       max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
10072     if ($rOpts_ignore_old_breakpoints) {
10073
10074         # Patch suggested by b1231; the old tol was excessive.
10075         ## $length_tol += $rOpts_maximum_line_length;
10076         $length_tol *= 2;
10077     }
10078
10079     my $rbreak_before_container_by_seqno = {};
10080     my $rwant_reduced_ci                 = {};
10081     foreach my $seqno ( keys %{$K_opening_container} ) {
10082
10083         #----------------------------------------------------------------
10084         # Part 1: Examine any -bbx=n flags
10085         #----------------------------------------------------------------
10086
10087         next if ( $rblock_type_of_seqno->{$seqno} );
10088         my $KK = $K_opening_container->{$seqno};
10089
10090         # This must be a list or contain a list.
10091         # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
10092         # Note2: 'has_list' holds the depth to the sub-list.  We will require
10093         #  a depth of just 1
10094         my $is_list  = $self->is_list_by_seqno($seqno);
10095         my $has_list = $rhas_list->{$seqno};
10096
10097         # Fix for b1173: if welded opening container, use flag of innermost
10098         # seqno.  Otherwise, the restriction $has_list==1 prevents triple and
10099         # higher welds from following the -BBX parameters.
10100         if ($total_weld_count) {
10101             my $KK_test = $rK_weld_right->{$KK};
10102             if ( defined($KK_test) ) {
10103                 my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
10104                 $is_list ||= $self->is_list_by_seqno($seqno_inner);
10105                 $has_list = $rhas_list->{$seqno_inner};
10106             }
10107         }
10108
10109         next unless ( $is_list || $has_list && $has_list == 1 );
10110
10111         my $has_broken_list   = $rhas_broken_list->{$seqno};
10112         my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
10113
10114         # Only for types of container tokens with a non-default break option
10115         my $token        = $rLL->[$KK]->[_TOKEN_];
10116         my $break_option = $break_before_container_types{$token};
10117         next unless ($break_option);
10118
10119         # Do not use -bbx under stress for stability ... fixes b1300
10120         my $level = $rLL->[$KK]->[_LEVEL_];
10121         if ( $level >= $stress_level_beta ) {
10122             DEBUG_BBX
10123               && print
10124 "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
10125             next;
10126         }
10127
10128         # Require previous nonblank to be '=' or '=>'
10129         my $Kprev = $KK - 1;
10130         next if ( $Kprev < 0 );
10131         my $prev_type = $rLL->[$Kprev]->[_TYPE_];
10132         if ( $prev_type eq 'b' ) {
10133             $Kprev--;
10134             next if ( $Kprev < 0 );
10135             $prev_type = $rLL->[$Kprev]->[_TYPE_];
10136         }
10137         next unless ( $is_equal_or_fat_comma{$prev_type} );
10138
10139         my $ci = $rLL->[$KK]->[_CI_LEVEL_];
10140
10141         #--------------------------------------------
10142         # New coding for option 2 (break if complex).
10143         #--------------------------------------------
10144         # This new coding uses clues which are invariant under formatting to
10145         # decide if a list is complex.  For now it is only applied when -lp
10146         # and -vmll are used, but eventually it may become the standard method.
10147         # Fixes b1274, b1275, and others, including b1099.
10148         if ( $break_option == 2 ) {
10149
10150             if (   $rOpts_line_up_parentheses
10151                 || $rOpts_variable_maximum_line_length )
10152             {
10153
10154                 # Start with the basic definition of a complex list...
10155                 my $is_complex = $is_list && $has_list;
10156
10157                 # and it is also complex if the parent is a list
10158                 if ( !$is_complex ) {
10159                     my $parent = $rparent_of_seqno->{$seqno};
10160                     if ( $self->is_list_by_seqno($parent) ) {
10161                         $is_complex = 1;
10162                     }
10163                 }
10164
10165                 # finally, we will call it complex if there are inner opening
10166                 # and closing container tokens, not parens, within the outer
10167                 # container tokens.
10168                 if ( !$is_complex ) {
10169                     my $Kp      = $self->K_next_nonblank($KK);
10170                     my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
10171                     if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
10172
10173                         my $Kc = $K_closing_container->{$seqno};
10174                         my $Km = $self->K_previous_nonblank($Kc);
10175                         my $token_m =
10176                           defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10177
10178                         # ignore any optional ending comma
10179                         if ( $token_m eq ',' ) {
10180                             $Km = $self->K_previous_nonblank($Km);
10181                             $token_m =
10182                               defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
10183                         }
10184
10185                         $is_complex ||=
10186                           $is_closing_token{$token_m} && $token_m ne ')';
10187                     }
10188                 }
10189
10190                 # Convert to option 3 (always break) if complex
10191                 next unless ($is_complex);
10192                 $break_option = 3;
10193             }
10194         }
10195
10196         # Fix for b1231: the has_list_with_lec does not cover all cases.
10197         # A broken container containing a list and with line-ending commas
10198         # will stay broken, so can be treated as if it had a list with lec.
10199         $has_list_with_lec ||=
10200              $has_list
10201           && $ris_broken_container->{$seqno}
10202           && $rlec_count_by_seqno->{$seqno};
10203
10204         DEBUG_BBX
10205           && print STDOUT
10206 "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
10207
10208         # -bbx=1 = stable, try to follow input
10209         if ( $break_option == 1 ) {
10210
10211             my $iline    = $rLL->[$KK]->[_LINE_INDEX_];
10212             my $rK_range = $rlines->[$iline]->{_rK_range};
10213             my ( $Kfirst, $Klast ) = @{$rK_range};
10214             next unless ( $KK == $Kfirst );
10215         }
10216
10217         # -bbx=2 => apply this style only for a 'complex' list
10218         elsif ( $break_option == 2 ) {
10219
10220             #  break if this list contains a broken list with line-ending comma
10221             my $ok_to_break;
10222             my $Msg = EMPTY_STRING;
10223             if ($has_list_with_lec) {
10224                 $ok_to_break = 1;
10225                 DEBUG_BBX && do { $Msg = "has list with lec;" };
10226             }
10227
10228             if ( !$ok_to_break ) {
10229
10230                 # Turn off -xci if -bbx=2 and this container has a sublist but
10231                 # not a broken sublist. This avoids creating blinkers.  The
10232                 # problem is that -xci can cause one-line lists to break open,
10233                 # and thereby creating formatting instability.
10234                 # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
10235                 # b1045 b1046 b1047 b1051 b1052 b1061.
10236                 if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
10237
10238                 my $parent = $rparent_of_seqno->{$seqno};
10239                 if ( $self->is_list_by_seqno($parent) ) {
10240                     DEBUG_BBX && do { $Msg = "parent is list" };
10241                     $ok_to_break = 1;
10242                 }
10243             }
10244
10245             if ( !$ok_to_break ) {
10246                 DEBUG_BBX
10247                   && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
10248                 next;
10249             }
10250
10251             DEBUG_BBX
10252               && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
10253
10254             # Patch: turn off -xci if -bbx=2 and -lp
10255             # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
10256             $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
10257         }
10258
10259         # -bbx=3 = always break
10260         elsif ( $break_option == 3 ) {
10261
10262             # ok to break
10263         }
10264
10265         # Shouldn't happen! Bad flag, but make behavior same as 3
10266         else {
10267             # ok to break
10268         }
10269
10270         # Set a flag for actual implementation later in
10271         # sub insert_breaks_before_list_opening_containers
10272         $rbreak_before_container_by_seqno->{$seqno} = 1;
10273         DEBUG_BBX
10274           && print STDOUT "BBX: ok to break at seqno=$seqno\n";
10275
10276         # -bbxi=0: Nothing more to do if the ci value remains unchanged
10277         my $ci_flag = $container_indentation_options{$token};
10278         next unless ($ci_flag);
10279
10280         # -bbxi=1: This option removes ci and is handled in
10281         # later sub final_indentation_adjustment
10282         if ( $ci_flag == 1 ) {
10283             $rwant_reduced_ci->{$seqno} = 1;
10284             next;
10285         }
10286
10287         # -bbxi=2: This option changes the level ...
10288         # This option can conflict with -xci in some cases.  We can turn off
10289         # -xci for this container to avoid blinking.  For now, only do this if
10290         # -vmll is set.  ( fixes b1335, b1336 )
10291         if ($rOpts_variable_maximum_line_length) {
10292             $rno_xci_by_seqno->{$seqno} = 1;
10293         }
10294
10295         #----------------------------------------------------------------
10296         # Part 2: Perform tests before committing to changing ci and level
10297         #----------------------------------------------------------------
10298
10299         # Before changing the ci level of the opening container, we need
10300         # to be sure that the container will be broken in the later stages of
10301         # formatting.  We have to do this because we are working early in the
10302         # formatting pipeline.  A problem can occur if we change the ci or
10303         # level of the opening token but do not actually break the container
10304         # open as expected.  In most cases it wouldn't make any difference if
10305         # we changed ci or not, but there are some edge cases where this
10306         # can cause blinking states, so we need to try to only change ci if
10307         # the container will really be broken.
10308
10309         # Only consider containers already broken
10310         next if ( !$ris_broken_container->{$seqno} );
10311
10312         # Patch to fix issue b1305: the combination of -naws and ci>i appears
10313         # to cause an instability.  It should almost never occur in practice.
10314         next
10315           if (!$rOpts_add_whitespace
10316             && $rOpts_continuation_indentation > $rOpts_indent_columns );
10317
10318         # Always ok to change ci for permanently broken containers
10319         if ( $ris_permanently_broken->{$seqno} ) {
10320             goto OK;
10321         }
10322
10323         # Always OK if this list contains a broken sub-container with
10324         # a non-terminal line-ending comma
10325         if ($has_list_with_lec) { goto OK }
10326
10327         # From here on we are considering a single container...
10328
10329         # A single container must have at least 1 line-ending comma:
10330         next unless ( $rlec_count_by_seqno->{$seqno} );
10331
10332         # Since it has a line-ending comma, it will stay broken if the -boc
10333         # flag is set
10334         if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
10335
10336         # OK if the container contains multiple fat commas
10337         # Better: multiple lines with fat commas
10338         if ( !$rOpts_ignore_old_breakpoints ) {
10339             my $rtype_count = $rtype_count_by_seqno->{$seqno};
10340             next unless ($rtype_count);
10341             my $fat_comma_count = $rtype_count->{'=>'};
10342             DEBUG_BBX
10343               && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
10344             if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
10345         }
10346
10347         # The last check we can make is to see if this container could fit on a
10348         # single line.  Use the least possible indentation estimate, ci=0,
10349         # so we are not subtracting $ci * $rOpts_continuation_indentation from
10350         # tabulated $maximum_text_length  value.
10351         my $maximum_text_length = $maximum_text_length_at_level[$level];
10352         my $K_closing           = $K_closing_container->{$seqno};
10353         my $length = $self->cumulative_length_before_K($K_closing) -
10354           $self->cumulative_length_before_K($KK);
10355         my $excess_length = $length - $maximum_text_length;
10356         DEBUG_BBX
10357           && print STDOUT
10358 "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
10359
10360         # OK if the net container definitely breaks on length
10361         if ( $excess_length > $length_tol ) {
10362             DEBUG_BBX
10363               && print STDOUT "BBX: excess_length=$excess_length\n";
10364             goto OK;
10365         }
10366
10367         # Otherwise skip it
10368         next;
10369
10370         #################################################################
10371         # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
10372         #################################################################
10373
10374       OK:
10375
10376         DEBUG_BBX && print STDOUT "BBX: OK to break\n";
10377
10378         # -bbhbi=n
10379         # -bbsbi=n
10380         # -bbpi=n
10381
10382         # where:
10383
10384         # n=0  default indentation (usually one ci)
10385         # n=1  outdent one ci
10386         # n=2  indent one level (minus one ci)
10387         # n=3  indent one extra ci [This may be dropped]
10388
10389         # NOTE: We are adjusting indentation of the opening container. The
10390         # closing container will normally follow the indentation of the opening
10391         # container automatically, so this is not currently done.
10392         next unless ($ci);
10393
10394         # option 1: outdent
10395         if ( $ci_flag == 1 ) {
10396             $ci -= 1;
10397         }
10398
10399         # option 2: indent one level
10400         elsif ( $ci_flag == 2 ) {
10401             $ci -= 1;
10402             $radjusted_levels->[$KK] += 1;
10403         }
10404
10405         # unknown option
10406         else {
10407             # Shouldn't happen - leave ci unchanged
10408         }
10409
10410         $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
10411     }
10412
10413     $self->[_rbreak_before_container_by_seqno_] =
10414       $rbreak_before_container_by_seqno;
10415     $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
10416     return;
10417 } ## end sub break_before_list_opening_containers
10418
10419 use constant DEBUG_XCI => 0;
10420
10421 sub extended_ci {
10422
10423     # This routine implements the -xci (--extended-continuation-indentation)
10424     # flag.  We add CI to interior tokens of a container which itself has CI but
10425     # only if a token does not already have CI.
10426
10427     # To do this, we will locate opening tokens which themselves have
10428     # continuation indentation (CI).  We track them with their sequence
10429     # numbers.  These sequence numbers are called 'controlling sequence
10430     # numbers'.  They apply continuation indentation to the tokens that they
10431     # contain.  These inner tokens remember their controlling sequence numbers.
10432     # Later, when these inner tokens are output, they have to see if the output
10433     # lines with their controlling tokens were output with CI or not.  If not,
10434     # then they must remove their CI too.
10435
10436     # The controlling CI concept works hierarchically.  But CI itself is not
10437     # hierarchical; it is either on or off. There are some rare instances where
10438     # it would be best to have hierarchical CI too, but not enough to be worth
10439     # the programming effort.
10440
10441     # The operations to remove unwanted CI are done in sub 'undo_ci'.
10442
10443     my ($self) = @_;
10444
10445     my $rLL = $self->[_rLL_];
10446     return unless ( defined($rLL) && @{$rLL} );
10447
10448     my $ris_list_by_seqno        = $self->[_ris_list_by_seqno_];
10449     my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
10450     my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
10451     my $rlines                   = $self->[_rlines_];
10452     my $rno_xci_by_seqno         = $self->[_rno_xci_by_seqno_];
10453     my $ris_bli_container        = $self->[_ris_bli_container_];
10454     my $rblock_type_of_seqno     = $self->[_rblock_type_of_seqno_];
10455
10456     my %available_space;
10457
10458     # Loop over all opening container tokens
10459     my $K_opening_container  = $self->[_K_opening_container_];
10460     my $K_closing_container  = $self->[_K_closing_container_];
10461     my $ris_broken_container = $self->[_ris_broken_container_];
10462     my @seqno_stack;
10463     my $seqno_top;
10464     my $KLAST;
10465     my $KNEXT = $self->[_K_first_seq_item_];
10466
10467     # The following variable can be used to allow a little extra space to
10468     # avoid blinkers.  A value $len_tol = 20 fixed the following
10469     # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
10470     # It turned out that the real problem was mis-parsing a list brace as
10471     # a code block in a 'use' statement when the line length was extremely
10472     # small.  A value of 0 works now, but a slightly larger value can
10473     # be used to minimize the chance of a blinker.
10474     my $len_tol = 0;
10475
10476     while ( defined($KNEXT) ) {
10477
10478         # Fix all tokens up to the next sequence item if we are changing CI
10479         if ($seqno_top) {
10480
10481             my $is_list = $ris_list_by_seqno->{$seqno_top};
10482             my $space   = $available_space{$seqno_top};
10483             my $length  = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
10484             my $count   = 0;
10485             foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
10486
10487                 # But do not include tokens which might exceed the line length
10488                 # and are not in a list.
10489                 # ... This fixes case b1031
10490                 my $length_before = $length;
10491                 $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
10492                 if (
10493                     !$rLL->[$Kt]->[_CI_LEVEL_]
10494                     && (   $is_list
10495                         || $length - $length_before < $space
10496                         || $rLL->[$Kt]->[_TYPE_] eq '#' )
10497                   )
10498                 {
10499                     $rLL->[$Kt]->[_CI_LEVEL_] = 1;
10500                     $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
10501                     $count++;
10502                 }
10503             }
10504             $ris_seqno_controlling_ci->{$seqno_top} += $count;
10505         }
10506
10507         $KLAST = $KNEXT;
10508         my $KK = $KNEXT;
10509         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
10510
10511         my $seqno     = $rLL->[$KK]->[_TYPE_SEQUENCE_];
10512         my $K_opening = $K_opening_container->{$seqno};
10513
10514         # see if we have reached the end of the current controlling container
10515         if ( $seqno_top && $seqno == $seqno_top ) {
10516             $seqno_top = pop @seqno_stack;
10517         }
10518
10519         # Patch to fix some block types...
10520         # Certain block types arrive from the tokenizer without CI but should
10521         # have it for this option.  These include anonymous subs and
10522         #     do sort map grep eval
10523         my $block_type = $rblock_type_of_seqno->{$seqno};
10524         if ( $block_type && $is_block_with_ci{$block_type} ) {
10525             $rLL->[$KK]->[_CI_LEVEL_] = 1;
10526             if ($seqno_top) {
10527                 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10528                 $ris_seqno_controlling_ci->{$seqno_top}++;
10529             }
10530         }
10531
10532         # If this does not have ci, update ci if necessary and continue looking
10533         if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
10534             if ($seqno_top) {
10535                 $rLL->[$KK]->[_CI_LEVEL_] = 1;
10536                 $rseqno_controlling_my_ci->{$KK} = $seqno_top;
10537                 $ris_seqno_controlling_ci->{$seqno_top}++;
10538             }
10539             next;
10540         }
10541
10542         # Skip if requested by -bbx to avoid blinkers
10543         if ( $rno_xci_by_seqno->{$seqno} ) {
10544             next;
10545         }
10546
10547         # Skip if this is a -bli container (this fixes case b1065) Note: case
10548         # b1065 is also fixed by the update for b1055, so this update is not
10549         # essential now.  But there does not seem to be a good reason to add
10550         # xci and bli together, so the update is retained.
10551         if ( $ris_bli_container->{$seqno} ) {
10552             next;
10553         }
10554
10555         # We are looking for opening container tokens with ci
10556         next unless ( defined($K_opening) && $KK == $K_opening );
10557
10558         # Make sure there is a corresponding closing container
10559         # (could be missing if the script has a brace error)
10560         my $K_closing = $K_closing_container->{$seqno};
10561         next unless defined($K_closing);
10562
10563         # Require different input lines. This will filter out a large number
10564         # of small hash braces and array brackets.  If we accidentally filter
10565         # out an important container, it will get fixed on the next pass.
10566         if (
10567             $rLL->[$K_opening]->[_LINE_INDEX_] ==
10568             $rLL->[$K_closing]->[_LINE_INDEX_]
10569             && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
10570                 $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
10571                 $rOpts_maximum_line_length )
10572           )
10573         {
10574             DEBUG_XCI
10575               && print "XCI: Skipping seqno=$seqno, require different lines\n";
10576             next;
10577         }
10578
10579         # Do not apply -xci if adding extra ci will put the container contents
10580         # beyond the line length limit (fixes cases b899 b935)
10581         my $level    = $rLL->[$K_opening]->[_LEVEL_];
10582         my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
10583         my $maximum_text_length =
10584           $maximum_text_length_at_level[$level] -
10585           $ci_level * $rOpts_continuation_indentation;
10586
10587         # Fix for b1197 b1198 b1199 b1200 b1201 b1202
10588         # Do not apply -xci if we are running out of space
10589         if ( $level >= $stress_level_beta ) {
10590             DEBUG_XCI
10591               && print
10592 "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
10593             next;
10594         }
10595
10596         # remember how much space is available for patch b1031 above
10597         my $space =
10598           $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
10599
10600         if ( $space < 0 ) {
10601             DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
10602             next;
10603         }
10604         DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
10605
10606         $available_space{$seqno} = $space;
10607
10608         # This becomes the next controlling container
10609         push @seqno_stack, $seqno_top if ($seqno_top);
10610         $seqno_top = $seqno;
10611     }
10612     return;
10613 } ## end sub extended_ci
10614
10615 sub braces_left_setup {
10616
10617     # Called once per file to mark all -bl, -sbl, and -asbl containers
10618     my $self = shift;
10619
10620     my $rOpts_bl   = $rOpts->{'opening-brace-on-new-line'};
10621     my $rOpts_sbl  = $rOpts->{'opening-sub-brace-on-new-line'};
10622     my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
10623     return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
10624
10625     my $rLL = $self->[_rLL_];
10626     return unless ( defined($rLL) && @{$rLL} );
10627
10628     # We will turn on this hash for braces controlled by these flags:
10629     my $rbrace_left = $self->[_rbrace_left_];
10630
10631     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10632     my $ris_asub_block       = $self->[_ris_asub_block_];
10633     my $ris_sub_block        = $self->[_ris_sub_block_];
10634     foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10635
10636         my $block_type = $rblock_type_of_seqno->{$seqno};
10637
10638         # use -asbl flag for an anonymous sub block
10639         if ( $ris_asub_block->{$seqno} ) {
10640             if ($rOpts_asbl) {
10641                 $rbrace_left->{$seqno} = 1;
10642             }
10643         }
10644
10645         # use -sbl flag for a named sub
10646         elsif ( $ris_sub_block->{$seqno} ) {
10647             if ($rOpts_sbl) {
10648                 $rbrace_left->{$seqno} = 1;
10649             }
10650         }
10651
10652         # use -bl flag if not a sub block of any type
10653         else {
10654             if (   $rOpts_bl
10655                 && $block_type =~ /$bl_pattern/
10656                 && $block_type !~ /$bl_exclusion_pattern/ )
10657             {
10658                 $rbrace_left->{$seqno} = 1;
10659             }
10660         }
10661     }
10662     return;
10663 } ## end sub braces_left_setup
10664
10665 sub bli_adjustment {
10666
10667     # Called once per file to implement the --brace-left-and-indent option.
10668     # If -bli is set, adds one continuation indentation for certain braces
10669     my $self = shift;
10670     return unless ( $rOpts->{'brace-left-and-indent'} );
10671     my $rLL = $self->[_rLL_];
10672     return unless ( defined($rLL) && @{$rLL} );
10673
10674     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
10675     my $ris_bli_container    = $self->[_ris_bli_container_];
10676     my $rbrace_left          = $self->[_rbrace_left_];
10677     my $K_opening_container  = $self->[_K_opening_container_];
10678     my $K_closing_container  = $self->[_K_closing_container_];
10679
10680     foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
10681         my $block_type = $rblock_type_of_seqno->{$seqno};
10682         if (   $block_type
10683             && $block_type =~ /$bli_pattern/
10684             && $block_type !~ /$bli_exclusion_pattern/ )
10685         {
10686             $ris_bli_container->{$seqno} = 1;
10687             $rbrace_left->{$seqno}       = 1;
10688             my $Ko = $K_opening_container->{$seqno};
10689             my $Kc = $K_closing_container->{$seqno};
10690             if ( defined($Ko) && defined($Kc) ) {
10691                 $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
10692             }
10693         }
10694     }
10695     return;
10696 } ## end sub bli_adjustment
10697
10698 sub find_multiline_qw {
10699
10700     my $self = shift;
10701
10702     # Multiline qw quotes are not sequenced items like containers { [ (
10703     # but behave in some respects in a similar way. So this routine finds them
10704     # and creates a separate sequence number system for later use.
10705
10706     # This is straightforward because they always begin at the end of one line
10707     # and and at the beginning of a later line. This is true no matter how we
10708     # finally make our line breaks, so we can find them before deciding on new
10709     # line breaks.
10710
10711     my $rstarting_multiline_qw_seqno_by_K = {};
10712     my $rending_multiline_qw_seqno_by_K   = {};
10713     my $rKrange_multiline_qw_by_seqno     = {};
10714     my $rmultiline_qw_has_extra_level     = {};
10715
10716     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
10717
10718     my $rlines = $self->[_rlines_];
10719     my $rLL    = $self->[_rLL_];
10720     my $qw_seqno;
10721     my $num_qw_seqno = 0;
10722     my $K_start_multiline_qw;
10723
10724     foreach my $line_of_tokens ( @{$rlines} ) {
10725
10726         my $line_type = $line_of_tokens->{_line_type};
10727         next unless ( $line_type eq 'CODE' );
10728         my $rK_range = $line_of_tokens->{_rK_range};
10729         my ( $Kfirst, $Klast ) = @{$rK_range};
10730         next unless ( defined($Kfirst) && defined($Klast) );   # skip blank line
10731         if ( defined($K_start_multiline_qw) ) {
10732             my $type = $rLL->[$Kfirst]->[_TYPE_];
10733
10734             # shouldn't happen
10735             if ( $type ne 'q' ) {
10736                 DEVEL_MODE && print STDERR <<EOM;
10737 STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
10738 EOM
10739                 $K_start_multiline_qw = undef;
10740                 next;
10741             }
10742             my $Kprev  = $self->K_previous_nonblank($Kfirst);
10743             my $Knext  = $self->K_next_nonblank($Kfirst);
10744             my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10745             my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10746             if ( $type_m eq 'q' && $type_p ne 'q' ) {
10747                 $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
10748                 $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
10749                   [ $K_start_multiline_qw, $Kfirst ];
10750                 $K_start_multiline_qw = undef;
10751                 $qw_seqno             = undef;
10752             }
10753         }
10754         if ( !defined($K_start_multiline_qw)
10755             && $rLL->[$Klast]->[_TYPE_] eq 'q' )
10756         {
10757             my $Kprev  = $self->K_previous_nonblank($Klast);
10758             my $Knext  = $self->K_next_nonblank($Klast);
10759             my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
10760             my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
10761             if ( $type_m ne 'q' && $type_p eq 'q' ) {
10762                 $num_qw_seqno++;
10763                 $qw_seqno             = 'q' . $num_qw_seqno;
10764                 $K_start_multiline_qw = $Klast;
10765                 $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
10766             }
10767         }
10768     }
10769
10770     # Give multiline qw lists extra indentation instead of CI.  This option
10771     # works well but is currently only activated when the -xci flag is set.
10772     # The reason is to avoid unexpected changes in formatting.
10773     if ($rOpts_extended_continuation_indentation) {
10774         while ( my ( $qw_seqno_x, $rKrange ) =
10775             each %{$rKrange_multiline_qw_by_seqno} )
10776         {
10777             my ( $Kbeg, $Kend ) = @{$rKrange};
10778
10779             # require isolated closing token
10780             my $token_end = $rLL->[$Kend]->[_TOKEN_];
10781             next
10782               unless ( length($token_end) == 1
10783                 && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
10784
10785             # require isolated opening token
10786             my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
10787
10788             # allow space(s) after the qw
10789             if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
10790             {
10791                 $token_beg =~ s/\s+//;
10792             }
10793
10794             next unless ( length($token_beg) == 3 );
10795
10796             foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
10797                 $rLL->[$KK]->[_LEVEL_]++;
10798                 $rLL->[$KK]->[_CI_LEVEL_] = 0;
10799             }
10800
10801             # set flag for -wn option, which will remove the level
10802             $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
10803         }
10804     }
10805
10806     # For the -lp option we need to mark all parent containers of
10807     # multiline quotes
10808     if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
10809
10810         while ( my ( $qw_seqno_x, $rKrange ) =
10811             each %{$rKrange_multiline_qw_by_seqno} )
10812         {
10813             my ( $Kbeg, $Kend ) = @{$rKrange};
10814             my $parent_seqno = $self->parent_seqno_by_K($Kend);
10815             next unless ($parent_seqno);
10816
10817             # If the parent container exactly surrounds this qw, then -lp
10818             # formatting seems to work so we will not mark it.
10819             my $is_tightly_contained;
10820             my $Kn      = $self->K_next_nonblank($Kend);
10821             my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
10822             if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
10823
10824                 my $Kp = $self->K_previous_nonblank($Kbeg);
10825                 my $seqno_p =
10826                   defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
10827                 if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
10828                     $is_tightly_contained = 1;
10829                 }
10830             }
10831
10832             $ris_excluded_lp_container->{$parent_seqno} = 1
10833               unless ($is_tightly_contained);
10834
10835             # continue up the tree marking parent containers
10836             while (1) {
10837                 $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
10838                 last
10839                   unless ( defined($parent_seqno)
10840                     && $parent_seqno ne SEQ_ROOT );
10841                 $ris_excluded_lp_container->{$parent_seqno} = 1;
10842             }
10843         }
10844     }
10845
10846     $self->[_rstarting_multiline_qw_seqno_by_K_] =
10847       $rstarting_multiline_qw_seqno_by_K;
10848     $self->[_rending_multiline_qw_seqno_by_K_] =
10849       $rending_multiline_qw_seqno_by_K;
10850     $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
10851     $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
10852
10853     return;
10854 } ## end sub find_multiline_qw
10855
10856 use constant DEBUG_COLLAPSED_LENGTHS => 0;
10857
10858 # Minimum space reserved for contents of a code block.  A value of 40 has given
10859 # reasonable results.  With a large line length, say -l=120, this will not
10860 # normally be noticeable but it will prevent making a mess in some edge cases.
10861 use constant MIN_BLOCK_LEN => 40;
10862
10863 my %is_handle_type;
10864
10865 BEGIN {
10866     my @q = qw( w C U G i k => );
10867     @is_handle_type{@q} = (1) x scalar(@q);
10868
10869     my $i = 0;
10870     use constant {
10871         _max_prong_len_         => $i++,
10872         _handle_len_            => $i++,
10873         _seqno_o_               => $i++,
10874         _iline_o_               => $i++,
10875         _K_o_                   => $i++,
10876         _K_c_                   => $i++,
10877         _interrupted_list_rule_ => $i++,
10878     };
10879 }
10880
10881 sub collapsed_lengths {
10882
10883     my $self = shift;
10884
10885     #----------------------------------------------------------------
10886     # Define the collapsed lengths of containers for -xlp indentation
10887     #----------------------------------------------------------------
10888
10889     # We need an estimate of the minimum required line length starting at any
10890     # opening container for the -xlp style. This is needed to avoid using too
10891     # much indentation space for lower level containers and thereby running
10892     # out of space for outer container tokens due to the maximum line length
10893     # limit.
10894
10895     # The basic idea is that at each node in the tree we imagine that we have a
10896     # fork with a handle and collapsible prongs:
10897     #
10898     #                            |------------
10899     #                            |--------
10900     #                ------------|-------
10901     #                 handle     |------------
10902     #                            |--------
10903     #                              prongs
10904     #
10905     # Each prong has a minimum collapsed length. The collapsed length at a node
10906     # is the maximum of these minimum lengths, plus the handle length.  Each of
10907     # the prongs may itself be a tree node.
10908
10909     # This is just a rough calculation to get an approximate starting point for
10910     # indentation.  Later routines will be more precise.  It is important that
10911     # these estimates be independent of the line breaks of the input stream in
10912     # order to avoid instabilities.
10913
10914     my $rLL                        = $self->[_rLL_];
10915     my $Klimit                     = $self->[_Klimit_];
10916     my $rlines                     = $self->[_rlines_];
10917     my $K_opening_container        = $self->[_K_opening_container_];
10918     my $K_closing_container        = $self->[_K_closing_container_];
10919     my $rblock_type_of_seqno       = $self->[_rblock_type_of_seqno_];
10920     my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
10921     my $ris_excluded_lp_container  = $self->[_ris_excluded_lp_container_];
10922     my $ris_permanently_broken     = $self->[_ris_permanently_broken_];
10923     my $ris_list_by_seqno          = $self->[_ris_list_by_seqno_];
10924     my $rhas_broken_list           = $self->[_rhas_broken_list_];
10925     my $rtype_count_by_seqno       = $self->[_rtype_count_by_seqno_];
10926
10927     my $K_start_multiline_qw;
10928     my $level_start_multiline_qw = 0;
10929     my $max_prong_len            = 0;
10930     my $handle_len_x             = 0;
10931     my @stack;
10932     my $len                = 0;
10933     my $last_nonblank_type = 'b';
10934     push @stack,
10935       [ $max_prong_len, $handle_len_x, SEQ_ROOT, undef, undef, undef, undef ];
10936
10937     my $iline = -1;
10938     foreach my $line_of_tokens ( @{$rlines} ) {
10939         $iline++;
10940         my $line_type = $line_of_tokens->{_line_type};
10941         next if ( $line_type ne 'CODE' );
10942         my $CODE_type = $line_of_tokens->{_code_type};
10943
10944         # Always skip blank lines
10945         next if ( $CODE_type eq 'BL' );
10946
10947         # Note on other line types:
10948         # 'FS' (Format Skipping) lines may contain opening/closing tokens so
10949         #      we have to process them to keep the stack correctly sequenced.
10950         # 'VB' (Verbatim) lines could be skipped, but testing shows that
10951         #      results look better if we include their lengths.
10952
10953         # Also note that we could exclude -xlp formatting of containers with
10954         # 'FS' and 'VB' lines, but in testing that was not really beneficial.
10955
10956         # So we process tokens in 'FS' and 'VB' lines like all the rest...
10957
10958         my $rK_range = $line_of_tokens->{_rK_range};
10959         my ( $K_first, $K_last ) = @{$rK_range};
10960         next unless ( defined($K_first) && defined($K_last) );
10961
10962         my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
10963
10964         # Always ignore block comments
10965         next if ( $has_comment && $K_first == $K_last );
10966
10967         # Handle an intermediate line of a multiline qw quote. These may
10968         # require including some -ci or -i spaces.  See cases c098/x063.
10969         # Updated to check all lines (not just $K_first==$K_last) to fix b1316
10970         my $K_begin_loop = $K_first;
10971         if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
10972
10973             my $KK       = $K_first;
10974             my $level    = $rLL->[$KK]->[_LEVEL_];
10975             my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
10976
10977             # remember the level of the start
10978             if ( !defined($K_start_multiline_qw) ) {
10979                 $K_start_multiline_qw     = $K_first;
10980                 $level_start_multiline_qw = $level;
10981                 my $seqno_qw =
10982                   $self->[_rstarting_multiline_qw_seqno_by_K_]
10983                   ->{$K_start_multiline_qw};
10984                 if ( !$seqno_qw ) {
10985                     my $Kp = $self->K_previous_nonblank($K_first);
10986                     if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
10987
10988                         $K_start_multiline_qw = $Kp;
10989                         $level_start_multiline_qw =
10990                           $rLL->[$K_start_multiline_qw]->[_LEVEL_];
10991                     }
10992                     else {
10993
10994                         # Fix for b1319, b1320
10995                         goto NOT_MULTILINE_QW;
10996                     }
10997                 }
10998             }
10999
11000             $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
11001               $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11002
11003             # We may have to add the spaces of one level or ci level ...  it
11004             # depends depends on the -xci flag, the -wn flag, and if the qw
11005             # uses a container token as the quote delimiter.
11006
11007             # First rule: add ci if there is a $ci_level
11008             if ($ci_level) {
11009                 $len += $rOpts_continuation_indentation;
11010             }
11011
11012             # Second rule: otherwise, look for an extra indentation level
11013             # from the start and add one indentation level if found.
11014             elsif ( $level > $level_start_multiline_qw ) {
11015                 $len += $rOpts_indent_columns;
11016             }
11017
11018             if ( $len > $max_prong_len ) { $max_prong_len = $len }
11019
11020             $last_nonblank_type = 'q';
11021
11022             $K_begin_loop = $K_first + 1;
11023
11024             # We can skip to the next line if more tokens
11025             next if ( $K_begin_loop > $K_last );
11026
11027         }
11028
11029       NOT_MULTILINE_QW:
11030         $K_start_multiline_qw = undef;
11031
11032         # Find the terminal token, before any side comment
11033         my $K_terminal = $K_last;
11034         if ($has_comment) {
11035             $K_terminal -= 1;
11036             $K_terminal -= 1
11037               if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
11038                 && $K_terminal > $K_first );
11039         }
11040
11041         # Use length to terminal comma if interrupted list rule applies
11042         if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
11043             my $K_c = $stack[-1]->[_K_c_];
11044             if (
11045                 defined($K_c)
11046                 && $rLL->[$K_terminal]->[_TYPE_] eq ','
11047
11048                 # Ignore if terminal comma, causes instability (b1297, b1330)
11049                 && (
11050                     $K_c - $K_terminal > 2
11051                     || (   $K_c - $K_terminal == 2
11052                         && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' )
11053                 )
11054               )
11055             {
11056                 my $Kend = $K_terminal;
11057
11058                 # This caused an instability in b1311 by making the result
11059                 # dependent on input.  It is not really necessary because the
11060                 # comment length is added at the end of the loop.
11061                 ##if ( $has_comment
11062                 ##    && !$rOpts_ignore_side_comment_lengths )
11063                 ##{
11064                 ##    $Kend = $K_last;
11065                 ##}
11066
11067                 # changed from $len to my $leng to fix b1302 b1306 b1317 b1321
11068                 my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11069                   $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
11070
11071                 # Fix for b1331: at a broken => item, include the length of
11072                 # the previous half of the item plus one for the missing space
11073                 if ( $last_nonblank_type eq '=>' ) {
11074                     $leng += $len + 1;
11075                 }
11076
11077                 if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
11078             }
11079         }
11080
11081         # Loop over tokens on this line ...
11082         foreach my $KK ( $K_begin_loop .. $K_terminal ) {
11083
11084             my $type = $rLL->[$KK]->[_TYPE_];
11085             next if ( $type eq 'b' );
11086
11087             #------------------------
11088             # Handle sequenced tokens
11089             #------------------------
11090             my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
11091             if ($seqno) {
11092
11093                 my $token = $rLL->[$KK]->[_TOKEN_];
11094
11095                 #----------------------------
11096                 # Entering a new container...
11097                 #----------------------------
11098                 if ( $is_opening_token{$token}
11099                     && defined( $K_closing_container->{$seqno} ) )
11100                 {
11101
11102                     # save current prong length
11103                     $stack[-1]->[_max_prong_len_] = $max_prong_len;
11104                     $max_prong_len = 0;
11105
11106                     # Start new prong one level deeper
11107                     my $handle_len = 0;
11108                     if ( $rblock_type_of_seqno->{$seqno} ) {
11109
11110                         # code blocks do not use -lp indentation, but behave as
11111                         # if they had a handle of one indentation length
11112                         $handle_len = $rOpts_indent_columns;
11113
11114                     }
11115                     elsif ( $is_handle_type{$last_nonblank_type} ) {
11116                         $handle_len = $len;
11117                         $handle_len += 1
11118                           if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
11119                     }
11120
11121                     # Set a flag if the 'Interrupted List Rule' will be applied
11122                     # (see sub copy_old_breakpoints).
11123                     # - Added check on has_broken_list to fix issue b1298
11124
11125                     my $interrupted_list_rule =
11126                          $ris_permanently_broken->{$seqno}
11127                       && $ris_list_by_seqno->{$seqno}
11128                       && !$rhas_broken_list->{$seqno}
11129                       && !$rOpts_ignore_old_breakpoints;
11130
11131                     # NOTES: Since we are looking at old line numbers we have
11132                     # to be very careful not to introduce an instability.
11133
11134                     # This following causes instability (b1288-b1296):
11135                     #   $interrupted_list_rule ||=
11136                     #     $rOpts_break_at_old_comma_breakpoints;
11137
11138                     #  - We could turn off the interrupted list rule if there is
11139                     #    a broken sublist, to follow 'Compound List Rule 1'.
11140                     #  - We could use the _rhas_broken_list_ flag for this.
11141                     #  - But it seems safer not to do this, to avoid
11142                     #    instability, since the broken sublist could be
11143                     #    temporary.  It seems better to let the formatting
11144                     #    stabilize by itself after one or two iterations.
11145                     #  - So, not doing this for now
11146
11147                     # Turn off the interrupted list rule if -vmll is set and a
11148                     # list has '=>' characters.  This avoids instabilities due
11149                     # to dependence on old line breaks; issue b1325.
11150                     if (   $interrupted_list_rule
11151                         && $rOpts_variable_maximum_line_length )
11152                     {
11153                         my $rtype_count = $rtype_count_by_seqno->{$seqno};
11154                         if ( $rtype_count && $rtype_count->{'=>'} ) {
11155                             $interrupted_list_rule = 0;
11156                         }
11157                     }
11158
11159                     # Include length to a comma ending this line
11160                     if (   $interrupted_list_rule
11161                         && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
11162                     {
11163                         my $Kend = $K_terminal;
11164
11165                         # fix for b1332: side comments handled at end of loop
11166                         ##if ( $Kend < $K_last
11167                         ##    && !$rOpts_ignore_side_comment_lengths )
11168                         ##{
11169                         ##    $Kend = $K_last;
11170                         ##}
11171
11172                         # Measure from the next blank if any (fixes b1301)
11173                         my $Kbeg = $KK;
11174                         if (   $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
11175                             && $Kbeg < $Kend )
11176                         {
11177                             $Kbeg++;
11178                         }
11179
11180                         my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
11181                           $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
11182                         if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
11183                     }
11184
11185                     my $K_c = $K_closing_container->{$seqno};
11186
11187                     push @stack,
11188                       [
11189                         $max_prong_len, $handle_len,
11190                         $seqno,         $iline,
11191                         $KK,            $K_c,
11192                         $interrupted_list_rule
11193                       ];
11194                 }
11195
11196                 #--------------------
11197                 # Exiting a container
11198                 #--------------------
11199                 elsif ( $is_closing_token{$token} ) {
11200                     if (@stack) {
11201
11202                         # The current prong ends - get its handle
11203                         my $item          = pop @stack;
11204                         my $handle_len    = $item->[_handle_len_];
11205                         my $seqno_o       = $item->[_seqno_o_];
11206                         my $iline_o       = $item->[_iline_o_];
11207                         my $K_o           = $item->[_K_o_];
11208                         my $K_c_expect    = $item->[_K_c_];
11209                         my $collapsed_len = $max_prong_len;
11210
11211                         if ( $seqno_o ne $seqno ) {
11212
11213                             # This can happen if input file has brace errors.
11214                             # Otherwise it shouldn't happen.  Not fatal but -lp
11215                             # formatting could get messed up.
11216                             if ( DEVEL_MODE && !get_saw_brace_error() ) {
11217                                 Fault(<<EOM);
11218 sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
11219 EOM
11220                             }
11221                         }
11222
11223                         #------------------------------------------
11224                         # Rules to avoid scrunching code blocks ...
11225                         #------------------------------------------
11226                         # Some test cases:
11227                         # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
11228                         my $block_type = $rblock_type_of_seqno->{$seqno};
11229                         if ($block_type) {
11230
11231                             my $K_c          = $KK;
11232                             my $block_length = MIN_BLOCK_LEN;
11233                             my $is_one_line_block;
11234                             my $level = $rLL->[$K_o]->[_LEVEL_];
11235                             if ( defined($K_o) && defined($K_c) ) {
11236
11237                                 # note: fixed 3 May 2022 (removed 'my')
11238                                 $block_length =
11239                                   $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
11240                                   $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
11241                                 $is_one_line_block = $iline == $iline_o;
11242                             }
11243
11244                             # Code block rule 1: Use the total block length if
11245                             # it is less than the minimum.
11246                             if ( $block_length < MIN_BLOCK_LEN ) {
11247                                 $collapsed_len = $block_length;
11248                             }
11249
11250                             # Code block rule 2: Use the full length of a
11251                             # one-line block to avoid breaking it, unless
11252                             # extremely long.  We do not need to do a precise
11253                             # check here, because if it breaks then it will
11254                             # stay broken on later iterations.
11255                             elsif (
11256                                    $is_one_line_block
11257                                 && $block_length <
11258                                 $maximum_line_length_at_level[$level]
11259
11260                                 # But skip this for sort/map/grep/eval blocks
11261                                 # because they can reform (b1345)
11262                                 && !$is_sort_map_grep_eval{$block_type}
11263                               )
11264                             {
11265                                 $collapsed_len = $block_length;
11266                             }
11267
11268                             # Code block rule 3: Otherwise the length should be
11269                             # at least MIN_BLOCK_LEN to avoid scrunching code
11270                             # blocks.
11271                             elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
11272                                 $collapsed_len = MIN_BLOCK_LEN;
11273                             }
11274                         }
11275
11276                         # Store the result.  Some extra space, '2', allows for
11277                         # length of an opening token, inside space, comma, ...
11278                         # This constant has been tuned to give good overall
11279                         # results.
11280                         $collapsed_len += 2;
11281                         $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
11282
11283                         # Restart scanning the lower level prong
11284                         if (@stack) {
11285                             $max_prong_len = $stack[-1]->[_max_prong_len_];
11286                             $collapsed_len += $handle_len;
11287                             if ( $collapsed_len > $max_prong_len ) {
11288                                 $max_prong_len = $collapsed_len;
11289                             }
11290                         }
11291                     }
11292                 }
11293
11294                 # it is a ternary - no special processing for these yet
11295                 else {
11296
11297                 }
11298
11299                 $len                = 0;
11300                 $last_nonblank_type = $type;
11301                 next;
11302             }
11303
11304             #----------------------------
11305             # Handle non-container tokens
11306             #----------------------------
11307             my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
11308
11309             # Count lengths of things like 'xx => yy' as a single item
11310             if ( $type eq '=>' ) {
11311                 $len += $token_length + 1;
11312                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11313             }
11314             elsif ( $last_nonblank_type eq '=>' ) {
11315                 $len += $token_length;
11316                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11317
11318                 # but only include one => per item
11319                 $len = $token_length;
11320             }
11321
11322             # include everything to end of line after a here target
11323             elsif ( $type eq 'h' ) {
11324                 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
11325                   $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
11326                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11327             }
11328
11329             # for everything else just use the token length
11330             else {
11331                 $len = $token_length;
11332                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11333             }
11334             $last_nonblank_type = $type;
11335
11336         } ## end loop over tokens on this line
11337
11338         # Now take care of any side comment
11339         if ($has_comment) {
11340             if ($rOpts_ignore_side_comment_lengths) {
11341                 $len = 0;
11342             }
11343             else {
11344
11345                 # For a side comment when -iscl is not set, measure length from
11346                 # the start of the previous nonblank token
11347                 my $len0 =
11348                     $K_terminal > 0
11349                   ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
11350                   : 0;
11351                 $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
11352                 if ( $len > $max_prong_len ) { $max_prong_len = $len }
11353             }
11354         }
11355
11356     } ## end loop over lines
11357
11358     if (DEBUG_COLLAPSED_LENGTHS) {
11359         print "\nCollapsed lengths--\n";
11360         foreach
11361           my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
11362         {
11363             my $clen = $rcollapsed_length_by_seqno->{$key};
11364             print "$key -> $clen\n";
11365         }
11366     }
11367
11368     return;
11369 } ## end sub collapsed_lengths
11370
11371 sub is_excluded_lp {
11372
11373     # Decide if this container is excluded by user request:
11374     #  returns true if this token is excluded (i.e., may not use -lp)
11375     #  returns false otherwise
11376
11377     # The control hash can either describe:
11378     #   what to exclude:  $line_up_parentheses_control_is_lxpl = 1, or
11379     #   what to include:  $line_up_parentheses_control_is_lxpl = 0
11380
11381     my ( $self, $KK ) = @_;
11382     my $rLL         = $self->[_rLL_];
11383     my $rtoken_vars = $rLL->[$KK];
11384     my $token       = $rtoken_vars->[_TOKEN_];
11385     my $rflags      = $line_up_parentheses_control_hash{$token};
11386
11387     #-----------------------------------------------
11388     # TEST #1: check match to listed container types
11389     #-----------------------------------------------
11390     if ( !defined($rflags) ) {
11391
11392         # There is no entry for this container, so we are done
11393         return !$line_up_parentheses_control_is_lxpl;
11394     }
11395
11396     my ( $flag1, $flag2 ) = @{$rflags};
11397
11398     #-----------------------------------------------------------
11399     # TEST #2: check match to flag1, the preceding nonblank word
11400     #-----------------------------------------------------------
11401     my $match_flag1 = !defined($flag1) || $flag1 eq '*';
11402     if ( !$match_flag1 ) {
11403
11404         # Find the previous token
11405         my ( $is_f, $is_k, $is_w );
11406         my $Kp = $self->K_previous_nonblank($KK);
11407         if ( defined($Kp) ) {
11408             my $type_p = $rLL->[$Kp]->[_TYPE_];
11409             my $seqno  = $rtoken_vars->[_TYPE_SEQUENCE_];
11410
11411             # keyword?
11412             $is_k = $type_p eq 'k';
11413
11414             # function call?
11415             $is_f = $self->[_ris_function_call_paren_]->{$seqno};
11416
11417             # either keyword or function call?
11418             $is_w = $is_k || $is_f;
11419         }
11420
11421         # Check for match based on flag1 and the previous token:
11422         if    ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
11423         elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
11424         elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
11425         elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
11426         elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
11427         elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
11428     }
11429
11430     # See if we can exclude this based on the flag1 test...
11431     if ($line_up_parentheses_control_is_lxpl) {
11432         return 1 if ($match_flag1);
11433     }
11434     else {
11435         return 1 if ( !$match_flag1 );
11436     }
11437
11438     #-------------------------------------------------------------
11439     # TEST #3: exclusion based on flag2 and the container contents
11440     #-------------------------------------------------------------
11441
11442     # Note that this is an exclusion test for both -lpxl or -lpil input methods
11443     # The options are:
11444     #  0 or blank: ignore container contents
11445     #  1 exclude non-lists or lists with sublists
11446     #  2 same as 1 but also exclude lists with code blocks
11447
11448     my $match_flag2;
11449     if ($flag2) {
11450
11451         my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
11452
11453         my $is_list        = $self->[_ris_list_by_seqno_]->{$seqno};
11454         my $has_list       = $self->[_rhas_list_]->{$seqno};
11455         my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
11456         my $has_ternary    = $self->[_rhas_ternary_]->{$seqno};
11457
11458         if (  !$is_list
11459             || $has_list
11460             || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
11461         {
11462             $match_flag2 = 1;
11463         }
11464     }
11465     return $match_flag2;
11466 } ## end sub is_excluded_lp
11467
11468 sub set_excluded_lp_containers {
11469
11470     my ($self) = @_;
11471     return unless ($rOpts_line_up_parentheses);
11472     my $rLL = $self->[_rLL_];
11473     return unless ( defined($rLL) && @{$rLL} );
11474
11475     my $K_opening_container       = $self->[_K_opening_container_];
11476     my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
11477     my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
11478
11479     foreach my $seqno ( keys %{$K_opening_container} ) {
11480
11481         # code blocks are always excluded by the -lp coding so we can skip them
11482         next if ( $rblock_type_of_seqno->{$seqno} );
11483
11484         my $KK = $K_opening_container->{$seqno};
11485         next unless defined($KK);
11486
11487         # see if a user exclusion rule turns off -lp for this container
11488         if ( $self->is_excluded_lp($KK) ) {
11489             $ris_excluded_lp_container->{$seqno} = 1;
11490         }
11491     }
11492     return;
11493 } ## end sub set_excluded_lp_containers
11494
11495 ######################################
11496 # CODE SECTION 6: Process line-by-line
11497 ######################################
11498
11499 sub process_all_lines {
11500
11501     #----------------------------------------------------------
11502     # Main loop to format all lines of a file according to type
11503     #----------------------------------------------------------
11504
11505     my $self                       = shift;
11506     my $rlines                     = $self->[_rlines_];
11507     my $sink_object                = $self->[_sink_object_];
11508     my $fh_tee                     = $self->[_fh_tee_];
11509     my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
11510     my $file_writer_object         = $self->[_file_writer_object_];
11511     my $logger_object              = $self->[_logger_object_];
11512     my $vertical_aligner_object    = $self->[_vertical_aligner_object_];
11513     my $save_logfile               = $self->[_save_logfile_];
11514
11515     # Note for RT#118553, leave only one newline at the end of a file.
11516     # Example code to do this is in comments below:
11517     # my $Opt_trim_ending_blank_lines = 0;
11518     # if ($Opt_trim_ending_blank_lines) {
11519     #     while ( my $line_of_tokens = pop @{$rlines} ) {
11520     #         my $line_type = $line_of_tokens->{_line_type};
11521     #         if ( $line_type eq 'CODE' ) {
11522     #             my $CODE_type = $line_of_tokens->{_code_type};
11523     #             next if ( $CODE_type eq 'BL' );
11524     #         }
11525     #         push @{$rlines}, $line_of_tokens;
11526     #         last;
11527     #     }
11528     # }
11529
11530    # But while this would be a trivial update, it would have very undesirable
11531    # side effects when perltidy is run from within an editor on a small snippet.
11532    # So this is best done with a separate filter, such
11533    # as 'delete_ending_blank_lines.pl' in the examples folder.
11534
11535     # Flag to prevent blank lines when POD occurs in a format skipping sect.
11536     my $in_format_skipping_section;
11537
11538     # set locations for blanks around long runs of keywords
11539     my $rwant_blank_line_after = $self->keyword_group_scan();
11540
11541     my $line_type      = EMPTY_STRING;
11542     my $i_last_POD_END = -10;
11543     my $i              = -1;
11544     foreach my $line_of_tokens ( @{$rlines} ) {
11545         $i++;
11546
11547         # insert blank lines requested for keyword sequences
11548         if (   $i > 0
11549             && defined( $rwant_blank_line_after->{ $i - 1 } )
11550             && $rwant_blank_line_after->{ $i - 1 } == 1 )
11551         {
11552             $self->want_blank_line();
11553         }
11554
11555         my $last_line_type = $line_type;
11556         $line_type = $line_of_tokens->{_line_type};
11557         my $input_line = $line_of_tokens->{_line_text};
11558
11559         # _line_type codes are:
11560         #   SYSTEM         - system-specific code before hash-bang line
11561         #   CODE           - line of perl code (including comments)
11562         #   POD_START      - line starting pod, such as '=head'
11563         #   POD            - pod documentation text
11564         #   POD_END        - last line of pod section, '=cut'
11565         #   HERE           - text of here-document
11566         #   HERE_END       - last line of here-doc (target word)
11567         #   FORMAT         - format section
11568         #   FORMAT_END     - last line of format section, '.'
11569         #   SKIP           - code skipping section
11570         #   SKIP_END       - last line of code skipping section, '#>>V'
11571         #   DATA_START     - __DATA__ line
11572         #   DATA           - unidentified text following __DATA__
11573         #   END_START      - __END__ line
11574         #   END            - unidentified text following __END__
11575         #   ERROR          - we are in big trouble, probably not a perl script
11576
11577         # put a blank line after an =cut which comes before __END__ and __DATA__
11578         # (required by podchecker)
11579         if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
11580             $i_last_POD_END = $i;
11581             $file_writer_object->reset_consecutive_blank_lines();
11582             if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
11583                 $self->want_blank_line();
11584             }
11585         }
11586
11587         # handle line of code..
11588         if ( $line_type eq 'CODE' ) {
11589
11590             my $CODE_type = $line_of_tokens->{_code_type};
11591             $in_format_skipping_section = $CODE_type eq 'FS';
11592
11593             # Handle blank lines
11594             if ( $CODE_type eq 'BL' ) {
11595
11596                 # Keep this blank? Start with the flag -kbl=n, where
11597                 #   n=0 ignore all old blank lines
11598                 #   n=1 stable: keep old blanks, but limited by -mbl=n
11599                 #   n=2 keep all old blank lines, regardless of -mbl=n
11600                 # If n=0 we delete all old blank lines and let blank line
11601                 # rules generate any needed blank lines.
11602                 my $kgb_keep = $rOpts_keep_old_blank_lines;
11603
11604                 # Then delete lines requested by the keyword-group logic if
11605                 # allowed
11606                 if (   $kgb_keep == 1
11607                     && defined( $rwant_blank_line_after->{$i} )
11608                     && $rwant_blank_line_after->{$i} == 2 )
11609                 {
11610                     $kgb_keep = 0;
11611                 }
11612
11613                 # But always keep a blank line following an =cut
11614                 if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
11615                     $kgb_keep = 1;
11616                 }
11617
11618                 if ($kgb_keep) {
11619                     $self->flush($CODE_type);
11620                     $file_writer_object->write_blank_code_line(
11621                         $rOpts_keep_old_blank_lines == 2 );
11622                     $self->[_last_line_leading_type_] = 'b';
11623                 }
11624                 next;
11625             }
11626             else {
11627
11628                 # Let logger see all non-blank lines of code. This is a slow
11629                 # operation so we avoid it if it is not going to be saved.
11630                 if ( $save_logfile && $logger_object ) {
11631                     $logger_object->black_box( $line_of_tokens,
11632                         $vertical_aligner_object->get_output_line_number );
11633                 }
11634             }
11635
11636             # Handle Format Skipping (FS) and Verbatim (VB) Lines
11637             if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
11638                 $self->write_unindented_line("$input_line");
11639                 $file_writer_object->reset_consecutive_blank_lines();
11640                 next;
11641             }
11642
11643             # Handle all other lines of code
11644             $self->process_line_of_CODE($line_of_tokens);
11645         }
11646
11647         # handle line of non-code..
11648         else {
11649
11650             # set special flags
11651             my $skip_line = 0;
11652             if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
11653
11654                 # Pod docs should have a preceding blank line.  But stay
11655                 # out of __END__ and __DATA__ sections, because
11656                 # the user may be using this section for any purpose whatsoever
11657                 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
11658                 if ( $rOpts->{'trim-pod'} )   { $input_line =~ s/\s+$// }
11659                 if (   !$skip_line
11660                     && !$in_format_skipping_section
11661                     && $line_type eq 'POD_START'
11662                     && !$self->[_saw_END_or_DATA_] )
11663                 {
11664                     $self->want_blank_line();
11665                 }
11666             }
11667
11668             # leave the blank counters in a predictable state
11669             # after __END__ or __DATA__
11670             elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
11671                 $file_writer_object->reset_consecutive_blank_lines();
11672                 $self->[_saw_END_or_DATA_] = 1;
11673             }
11674
11675             # Patch to avoid losing blank lines after a code-skipping block;
11676             # fixes case c047.
11677             elsif ( $line_type eq 'SKIP_END' ) {
11678                 $file_writer_object->reset_consecutive_blank_lines();
11679             }
11680
11681             # write unindented non-code line
11682             if ( !$skip_line ) {
11683                 $self->write_unindented_line($input_line);
11684             }
11685         }
11686     }
11687     return;
11688
11689 } ## end sub process_all_lines
11690
11691 sub keyword_group_scan {
11692     my $self = shift;
11693
11694     #-------------------------------------------------------------------------
11695     # Called once per file to process any --keyword-group-blanks-* parameters.
11696     #-------------------------------------------------------------------------
11697
11698     # Manipulate blank lines around keyword groups (kgb* flags)
11699     # Scan all lines looking for runs of consecutive lines beginning with
11700     # selected keywords.  Example keywords are 'my', 'our', 'local', ... but
11701     # they may be anything.  We will set flags requesting that blanks be
11702     # inserted around and within them according to input parameters.  Note
11703     # that we are scanning the lines as they came in in the input stream, so
11704     # they are not necessarily well formatted.
11705
11706     # The output of this sub is a return hash ref whose keys are the indexes of
11707     # lines after which we desire a blank line.  For line index i:
11708     #     $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
11709     #     $rhash_of_desires->{$i} = 2 means we want blank line $i removed
11710     my $rhash_of_desires = {};
11711
11712     # Nothing to do if no blanks can be output. This test added to fix
11713     # case b760.
11714     if ( !$rOpts_maximum_consecutive_blank_lines ) {
11715         return $rhash_of_desires;
11716     }
11717
11718     my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'};   # '-kgbb'
11719     my $Opt_blanks_after  = $rOpts->{'keyword-group-blanks-after'};    # '-kgba'
11720     my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'};   # '-kgbi'
11721     my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'};   # '-kgbd'
11722     my $Opt_size          = $rOpts->{'keyword-group-blanks-size'};     # '-kgbs'
11723
11724     # A range of sizes can be input with decimal notation like 'min.max' with
11725     # any number of dots between the two numbers. Examples:
11726     #    string    =>    min    max  matches
11727     #    1.1             1      1    exactly 1
11728     #    1.3             1      3    1,2, or 3
11729     #    1..3            1      3    1,2, or 3
11730     #    5               5      -    5 or more
11731     #    6.              6      -    6 or more
11732     #    .2              -      2    up to 2
11733     #    1.0             1      0    nothing
11734     my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
11735     if (   $Opt_size_min && $Opt_size_min !~ /^\d+$/
11736         || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
11737     {
11738         Warn(<<EOM);
11739 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
11740 ignoring all -kgb flags
11741 EOM
11742
11743         # Turn this option off so that this message does not keep repeating
11744         # during iterations and other files.
11745         $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
11746         return $rhash_of_desires;
11747     }
11748     $Opt_size_min = 1 unless ($Opt_size_min);
11749
11750     if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
11751         return $rhash_of_desires;
11752     }
11753
11754     # codes for $Opt_blanks_before and $Opt_blanks_after:
11755     # 0 = never (delete if exist)
11756     # 1 = stable (keep unchanged)
11757     # 2 = always (insert if missing)
11758
11759     return $rhash_of_desires
11760       unless $Opt_size_min > 0
11761       && ( $Opt_blanks_before != 1
11762         || $Opt_blanks_after != 1
11763         || $Opt_blanks_inside
11764         || $Opt_blanks_delete );
11765
11766     my $Opt_pattern         = $keyword_group_list_pattern;
11767     my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
11768     my $Opt_repeat_count =
11769       $rOpts->{'keyword-group-blanks-repeat-count'};    # '-kgbr'
11770
11771     my $rlines              = $self->[_rlines_];
11772     my $rLL                 = $self->[_rLL_];
11773     my $K_closing_container = $self->[_K_closing_container_];
11774     my $K_opening_container = $self->[_K_opening_container_];
11775     my $rK_weld_right       = $self->[_rK_weld_right_];
11776
11777     # variables for the current group and subgroups:
11778     my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
11779         @subgroup );
11780
11781     # Definitions:
11782     # ($ibeg, $iend) = starting and ending line indexes of this entire group
11783     #         $count = total number of keywords seen in this entire group
11784     #     $level_beg = indentation level of this group
11785     #         @group = [ $i, $token, $count ] =list of all keywords & blanks
11786     #      @subgroup =  $j, index of group where token changes
11787     #       @iblanks = line indexes of blank lines in input stream in this group
11788     #  where i=starting line index
11789     #        token (the keyword)
11790     #        count = number of this token in this subgroup
11791     #            j = index in group where token changes
11792     #
11793     # These vars will contain values for the most recently seen line:
11794     my ( $line_type, $CODE_type, $K_first, $K_last );
11795
11796     my $number_of_groups_seen = 0;
11797
11798     #-------------------
11799     # helper subroutines
11800     #-------------------
11801
11802     my $insert_blank_after = sub {
11803         my ($i) = @_;
11804         $rhash_of_desires->{$i} = 1;
11805         my $ip = $i + 1;
11806         if ( defined( $rhash_of_desires->{$ip} )
11807             && $rhash_of_desires->{$ip} == 2 )
11808         {
11809             $rhash_of_desires->{$ip} = 0;
11810         }
11811         return;
11812     };
11813
11814     my $split_into_sub_groups = sub {
11815
11816         # place blanks around long sub-groups of keywords
11817         # ...if requested
11818         return unless ($Opt_blanks_inside);
11819
11820         # loop over sub-groups, index k
11821         push @subgroup, scalar @group;
11822         my $kbeg = 1;
11823         my $kend = @subgroup - 1;
11824         foreach my $k ( $kbeg .. $kend ) {
11825
11826             # index j runs through all keywords found
11827             my $j_b = $subgroup[ $k - 1 ];
11828             my $j_e = $subgroup[$k] - 1;
11829
11830             # index i is the actual line number of a keyword
11831             my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
11832             my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
11833             my $num = $count_e - $count_b + 1;
11834
11835             # This subgroup runs from line $ib to line $ie-1, but may contain
11836             # blank lines
11837             if ( $num >= $Opt_size_min ) {
11838
11839                 # if there are blank lines, we require that at least $num lines
11840                 # be non-blank up to the boundary with the next subgroup.
11841                 my $nog_b = my $nog_e = 1;
11842                 if ( @iblanks && !$Opt_blanks_delete ) {
11843                     my $j_bb = $j_b + $num - 1;
11844                     my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
11845                     $nog_b = $count_bb - $count_b + 1 == $num;
11846
11847                     my $j_ee = $j_e - ( $num - 1 );
11848                     my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
11849                     $nog_e = $count_e - $count_ee + 1 == $num;
11850                 }
11851                 if ( $nog_b && $k > $kbeg ) {
11852                     $insert_blank_after->( $i_b - 1 );
11853                 }
11854                 if ( $nog_e && $k < $kend ) {
11855                     my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
11856                     $insert_blank_after->( $i_ep - 1 );
11857                 }
11858             }
11859         }
11860         return;
11861     };
11862
11863     my $delete_if_blank = sub {
11864         my ($i) = @_;
11865
11866         # delete line $i if it is blank
11867         return unless ( $i >= 0 && $i < @{$rlines} );
11868         return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
11869         my $code_type = $rlines->[$i]->{_code_type};
11870         if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
11871         return;
11872     };
11873
11874     my $delete_inner_blank_lines = sub {
11875
11876         # always remove unwanted trailing blank lines from our list
11877         return unless (@iblanks);
11878         while ( my $ibl = pop(@iblanks) ) {
11879             if ( $ibl < $iend ) { push @iblanks, $ibl; last }
11880             $iend = $ibl;
11881         }
11882
11883         # now mark mark interior blank lines for deletion if requested
11884         return unless ($Opt_blanks_delete);
11885
11886         while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
11887
11888         return;
11889     };
11890
11891     my $end_group = sub {
11892
11893         # end a group of keywords
11894         my ($bad_ending) = @_;
11895         if ( defined($ibeg) && $ibeg >= 0 ) {
11896
11897             # then handle sufficiently large groups
11898             if ( $count >= $Opt_size_min ) {
11899
11900                 $number_of_groups_seen++;
11901
11902                 # do any blank deletions regardless of the count
11903                 $delete_inner_blank_lines->();
11904
11905                 if ( $ibeg > 0 ) {
11906                     my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
11907
11908                     # patch for hash bang line which is not currently marked as
11909                     # a comment; mark it as a comment
11910                     if ( $ibeg == 1 && !$code_type ) {
11911                         my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
11912                         $code_type = 'BC'
11913                           if ( $line_text && $line_text =~ /^#/ );
11914                     }
11915
11916                     # Do not insert a blank after a comment
11917                     # (this could be subject to a flag in the future)
11918                     if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
11919                         if ( $Opt_blanks_before == INSERT ) {
11920                             $insert_blank_after->( $ibeg - 1 );
11921
11922                         }
11923                         elsif ( $Opt_blanks_before == DELETE ) {
11924                             $delete_if_blank->( $ibeg - 1 );
11925                         }
11926                     }
11927                 }
11928
11929                 # We will only put blanks before code lines. We could loosen
11930                 # this rule a little, but we have to be very careful because
11931                 # for example we certainly don't want to drop a blank line
11932                 # after a line like this:
11933                 #   my $var = <<EOM;
11934                 if ( $line_type eq 'CODE' && defined($K_first) ) {
11935
11936                     # - Do not put a blank before a line of different level
11937                     # - Do not put a blank line if we ended the search badly
11938                     # - Do not put a blank at the end of the file
11939                     # - Do not put a blank line before a hanging side comment
11940                     my $level    = $rLL->[$K_first]->[_LEVEL_];
11941                     my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
11942
11943                     if (   $level == $level_beg
11944                         && $ci_level == 0
11945                         && !$bad_ending
11946                         && $iend < @{$rlines}
11947                         && $CODE_type ne 'HSC' )
11948                     {
11949                         if ( $Opt_blanks_after == INSERT ) {
11950                             $insert_blank_after->($iend);
11951                         }
11952                         elsif ( $Opt_blanks_after == DELETE ) {
11953                             $delete_if_blank->( $iend + 1 );
11954                         }
11955                     }
11956                 }
11957             }
11958             $split_into_sub_groups->();
11959         }
11960
11961         # reset for another group
11962         $ibeg      = -1;
11963         $iend      = undef;
11964         $level_beg = -1;
11965         $K_closing = undef;
11966         @group     = ();
11967         @subgroup  = ();
11968         @iblanks   = ();
11969
11970         return;
11971     };
11972
11973     my $find_container_end = sub {
11974
11975         # If the keyword line is continued onto subsequent lines, find the
11976         # closing token '$K_closing' so that we can easily skip past the
11977         # contents of the container.
11978
11979         # We only set this value if we find a simple list, meaning
11980         # -contents only one level deep
11981         # -not welded
11982
11983         # First check: skip if next line is not one deeper
11984         my $Knext_nonblank = $self->K_next_nonblank($K_last);
11985         goto RETURN if ( !defined($Knext_nonblank) );
11986         my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
11987         goto RETURN if ( $level_next != $level_beg + 1 );
11988
11989         # Find the parent container of the first token on the next line
11990         my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
11991         goto RETURN unless ( defined($parent_seqno) );
11992
11993         # Must not be a weld (can be unstable)
11994         goto RETURN
11995           if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
11996
11997         # Opening container must exist and be on this line
11998         my $Ko = $K_opening_container->{$parent_seqno};
11999         goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
12000
12001         # Verify that the closing container exists and is on a later line
12002         my $Kc = $K_closing_container->{$parent_seqno};
12003         goto RETURN unless ( defined($Kc) && $Kc > $K_last );
12004
12005         # That's it
12006         $K_closing = $Kc;
12007         goto RETURN;
12008
12009       RETURN:
12010         return;
12011     };
12012
12013     my $add_to_group = sub {
12014         my ( $i, $token, $level ) = @_;
12015
12016         # End the previous group if we have reached the maximum
12017         # group size
12018         if ( $Opt_size_max && @group >= $Opt_size_max ) {
12019             $end_group->();
12020         }
12021
12022         if ( @group == 0 ) {
12023             $ibeg      = $i;
12024             $level_beg = $level;
12025             $count     = 0;
12026         }
12027
12028         $count++;
12029         $iend = $i;
12030
12031         # New sub-group?
12032         if ( !@group || $token ne $group[-1]->[1] ) {
12033             push @subgroup, scalar(@group);
12034         }
12035         push @group, [ $i, $token, $count ];
12036
12037         # remember if this line ends in an open container
12038         $find_container_end->();
12039
12040         return;
12041     };
12042
12043     #----------------------------------
12044     # loop over all lines of the source
12045     #----------------------------------
12046     $end_group->();
12047     my $i = -1;
12048     foreach my $line_of_tokens ( @{$rlines} ) {
12049
12050         $i++;
12051         last
12052           if ( $Opt_repeat_count > 0
12053             && $number_of_groups_seen >= $Opt_repeat_count );
12054
12055         $CODE_type = EMPTY_STRING;
12056         $K_first   = undef;
12057         $K_last    = undef;
12058         $line_type = $line_of_tokens->{_line_type};
12059
12060         # always end a group at non-CODE
12061         if ( $line_type ne 'CODE' ) { $end_group->(); next }
12062
12063         $CODE_type = $line_of_tokens->{_code_type};
12064
12065         # end any group at a format skipping line
12066         if ( $CODE_type && $CODE_type eq 'FS' ) {
12067             $end_group->();
12068             next;
12069         }
12070
12071         # continue in a verbatim (VB) type; it may be quoted text
12072         if ( $CODE_type eq 'VB' ) {
12073             if ( $ibeg >= 0 ) { $iend = $i; }
12074             next;
12075         }
12076
12077         # and continue in blank (BL) types
12078         if ( $CODE_type eq 'BL' ) {
12079             if ( $ibeg >= 0 ) {
12080                 $iend = $i;
12081                 push @{iblanks}, $i;
12082
12083                 # propagate current subgroup token
12084                 my $tok = $group[-1]->[1];
12085                 push @group, [ $i, $tok, $count ];
12086             }
12087             next;
12088         }
12089
12090         # examine the first token of this line
12091         my $rK_range = $line_of_tokens->{_rK_range};
12092         ( $K_first, $K_last ) = @{$rK_range};
12093         if ( !defined($K_first) ) {
12094
12095             # Somewhat unexpected blank line..
12096             # $rK_range is normally defined for line type CODE, but this can
12097             # happen for example if the input line was a single semicolon which
12098             # is being deleted.  In that case there was code in the input
12099             # file but it is not being retained. So we can silently return.
12100             return $rhash_of_desires;
12101         }
12102
12103         my $level    = $rLL->[$K_first]->[_LEVEL_];
12104         my $type     = $rLL->[$K_first]->[_TYPE_];
12105         my $token    = $rLL->[$K_first]->[_TOKEN_];
12106         my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
12107
12108         # End a group 'badly' at an unexpected level.  This will prevent
12109         # blank lines being incorrectly placed after the end of the group.
12110         # We are looking for any deviation from two acceptable patterns:
12111         #   PATTERN 1: a simple list; secondary lines are at level+1
12112         #   PATTERN 2: a long statement; all secondary lines same level
12113         # This was added as a fix for case b1177, in which a complex structure
12114         # got incorrectly inserted blank lines.
12115         if ( $ibeg >= 0 ) {
12116
12117             # Check for deviation from PATTERN 1, simple list:
12118             if ( defined($K_closing) && $K_first < $K_closing ) {
12119                 $end_group->(1) if ( $level != $level_beg + 1 );
12120             }
12121
12122             # Check for deviation from PATTERN 2, single statement:
12123             elsif ( $level != $level_beg ) { $end_group->(1) }
12124         }
12125
12126         # Do not look for keywords in lists ( keyword 'my' can occur in lists,
12127         # see case b760); fixed for c048.
12128         if ( $self->is_list_by_K($K_first) ) {
12129             if ( $ibeg >= 0 ) { $iend = $i }
12130             next;
12131         }
12132
12133         # see if this is a code type we seek (i.e. comment)
12134         if (   $CODE_type
12135             && $Opt_comment_pattern
12136             && $CODE_type =~ /$Opt_comment_pattern/ )
12137         {
12138
12139             my $tok = $CODE_type;
12140
12141             # Continuing a group
12142             if ( $ibeg >= 0 && $level == $level_beg ) {
12143                 $add_to_group->( $i, $tok, $level );
12144             }
12145
12146             # Start new group
12147             else {
12148
12149                 # first end old group if any; we might be starting new
12150                 # keywords at different level
12151                 if ( $ibeg >= 0 ) { $end_group->(); }
12152                 $add_to_group->( $i, $tok, $level );
12153             }
12154             next;
12155         }
12156
12157         # See if it is a keyword we seek, but never start a group in a
12158         # continuation line; the code may be badly formatted.
12159         if (   $ci_level == 0
12160             && $type eq 'k'
12161             && $token =~ /$Opt_pattern/ )
12162         {
12163
12164             # Continuing a keyword group
12165             if ( $ibeg >= 0 && $level == $level_beg ) {
12166                 $add_to_group->( $i, $token, $level );
12167             }
12168
12169             # Start new keyword group
12170             else {
12171
12172                 # first end old group if any; we might be starting new
12173                 # keywords at different level
12174                 if ( $ibeg >= 0 ) { $end_group->(); }
12175                 $add_to_group->( $i, $token, $level );
12176             }
12177             next;
12178         }
12179
12180         # This is not one of our keywords, but we are in a keyword group
12181         # so see if we should continue or quit
12182         elsif ( $ibeg >= 0 ) {
12183
12184             # - bail out on a large level change; we may have walked into a
12185             #   data structure or anonymous sub code.
12186             if ( $level > $level_beg + 1 || $level < $level_beg ) {
12187                 $end_group->(1);
12188                 next;
12189             }
12190
12191             # - keep going on a continuation line of the same level, since
12192             #   it is probably a continuation of our previous keyword,
12193             # - and keep going past hanging side comments because we never
12194             #   want to interrupt them.
12195             if ( ( ( $level == $level_beg ) && $ci_level > 0 )
12196                 || $CODE_type eq 'HSC' )
12197             {
12198                 $iend = $i;
12199                 next;
12200             }
12201
12202             # - continue if if we are within in a container which started with
12203             # the line of the previous keyword.
12204             if ( defined($K_closing) && $K_first <= $K_closing ) {
12205
12206                 # continue if entire line is within container
12207                 if ( $K_last <= $K_closing ) { $iend = $i; next }
12208
12209                 # continue at ); or }; or ];
12210                 my $KK = $K_closing + 1;
12211                 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
12212                     if ( $KK < $K_last ) {
12213                         if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
12214                         if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
12215                             $end_group->(1);
12216                             next;
12217                         }
12218                     }
12219                     $iend = $i;
12220                     next;
12221                 }
12222
12223                 $end_group->(1);
12224                 next;
12225             }
12226
12227             # - end the group if none of the above
12228             $end_group->();
12229             next;
12230         }
12231
12232         # not in a keyword group; continue
12233         else { next }
12234     }
12235
12236     # end of loop over all lines
12237     $end_group->();
12238     return $rhash_of_desires;
12239
12240 } ## end sub keyword_group_scan
12241
12242 #######################################
12243 # CODE SECTION 7: Process lines of code
12244 #######################################
12245
12246 {    ## begin closure process_line_of_CODE
12247
12248     # The routines in this closure receive lines of code and combine them into
12249     # 'batches' and send them along. A 'batch' is the unit of code which can be
12250     # processed further as a unit. It has the property that it is the largest
12251     # amount of code into which which perltidy is free to place one or more
12252     # line breaks within it without violating any constraints.
12253
12254     # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
12255
12256     # flags needed by the store routine
12257     my $line_of_tokens;
12258     my $no_internal_newlines;
12259     my $CODE_type;
12260
12261     # range of K of tokens for the current line
12262     my ( $K_first, $K_last );
12263
12264     my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
12265         $rblock_type_of_seqno, $ri_starting_one_line_block );
12266
12267     # past stored nonblank tokens and flags
12268     my (
12269         $K_last_nonblank_code,       $looking_for_else,
12270         $is_static_block_comment,    $last_CODE_type,
12271         $last_line_had_side_comment, $next_parent_seqno,
12272         $next_slevel,
12273     );
12274
12275     # Called once at the start of a new file
12276     sub initialize_process_line_of_CODE {
12277         $K_last_nonblank_code       = undef;
12278         $looking_for_else           = 0;
12279         $is_static_block_comment    = 0;
12280         $last_line_had_side_comment = 0;
12281         $next_parent_seqno          = SEQ_ROOT;
12282         $next_slevel                = undef;
12283         return;
12284     }
12285
12286     # Batch variables: these describe the current batch of code being formed
12287     # and sent down the pipeline.  They are initialized in the next
12288     # sub.
12289     my ( $rbrace_follower, $index_start_one_line_block,
12290         $semicolons_before_block_self_destruct,
12291         $starting_in_quote, $ending_in_quote, );
12292
12293     # Called before the start of each new batch
12294     sub initialize_batch_variables {
12295
12296         $max_index_to_go         = UNDEFINED_INDEX;
12297         $summed_lengths_to_go[0] = 0;
12298         $nesting_depth_to_go[0]  = 0;
12299         ##@summed_lengths_to_go       = @nesting_depth_to_go = (0);
12300         $ri_starting_one_line_block = [];
12301
12302         # The initialization code for the remaining batch arrays is as follows
12303         # and can be activated for testing.  But profiling shows that it is
12304         # time-consuming to re-initialize the batch arrays and is not necessary
12305         # because the maximum valid token, $max_index_to_go, is carefully
12306         # controlled.  This means however that it is not possible to do any
12307         # type of filter or map operation directly on these arrays.  And it is
12308         # not possible to use negative indexes. As a precaution against program
12309         # changes which might do this, sub pad_array_to_go adds some undefs at
12310         # the end of the current batch of data.
12311
12312         # So 'long story short': this is a waste of time
12313         0 && do { #<<<
12314         @block_type_to_go        = ();
12315         @type_sequence_to_go     = ();
12316         @forced_breakpoint_to_go = ();
12317         @token_lengths_to_go     = ();
12318         @levels_to_go            = ();
12319         @mate_index_to_go        = ();
12320         @ci_levels_to_go         = ();
12321         @nobreak_to_go           = ();
12322         @old_breakpoint_to_go    = ();
12323         @tokens_to_go            = ();
12324         @K_to_go                 = ();
12325         @types_to_go             = ();
12326         @leading_spaces_to_go    = ();
12327         @reduced_spaces_to_go    = ();
12328         @inext_to_go             = ();
12329         @iprev_to_go             = ();
12330         @parent_seqno_to_go      = ();
12331         };
12332
12333         $rbrace_follower = undef;
12334         $ending_in_quote = 0;
12335
12336         # These get re-initialized by calls to sub destroy_one_line_block():
12337         $index_start_one_line_block            = UNDEFINED_INDEX;
12338         $semicolons_before_block_self_destruct = 0;
12339
12340         # initialize forced breakpoint vars associated with each output batch
12341         $forced_breakpoint_count      = 0;
12342         $index_max_forced_break       = UNDEFINED_INDEX;
12343         $forced_breakpoint_undo_count = 0;
12344
12345         return;
12346     } ## end sub initialize_batch_variables
12347
12348     sub leading_spaces_to_go {
12349
12350         # return the number of indentation spaces for a token in the output
12351         # stream
12352
12353         my ($ii) = @_;
12354         return 0 if ( $ii < 0 );
12355         my $indentation = $leading_spaces_to_go[$ii];
12356         return ref($indentation) ? $indentation->get_spaces() : $indentation;
12357     } ## end sub leading_spaces_to_go
12358
12359     sub create_one_line_block {
12360         ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
12361           = @_;
12362         return;
12363     }
12364
12365     sub destroy_one_line_block {
12366         $index_start_one_line_block            = UNDEFINED_INDEX;
12367         $semicolons_before_block_self_destruct = 0;
12368         return;
12369     }
12370
12371     # Routine to place the current token into the output stream.
12372     # Called once per output token.
12373
12374     use constant DEBUG_STORE => 0;
12375
12376     sub store_token_to_go {
12377
12378         my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
12379
12380         # Add one token to the next batch.
12381         #   $Ktoken_vars = the index K in the global token array
12382         #   $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
12383         #                  unless they are temporarily being overridden
12384
12385         #------------------------------------------------------------------
12386         # NOTE: called once per token so coding efficiency is critical here
12387         #------------------------------------------------------------------
12388
12389         my $type = $rtoken_vars->[_TYPE_];
12390
12391         # Check for emergency flush...
12392         # The K indexes in the batch must always be a continuous sequence of
12393         # the global token array.  The batch process programming assumes this.
12394         # If storing this token would cause this relation to fail we must dump
12395         # the current batch before storing the new token.  It is extremely rare
12396         # for this to happen. One known example is the following two-line
12397         # snippet when run with parameters
12398         # --noadd-newlines  --space-terminal-semicolon:
12399         #    if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
12400         #    $yy=1;
12401         if ( $max_index_to_go >= 0 ) {
12402             if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
12403                 $self->flush_batch_of_CODE();
12404             }
12405
12406             # Do not output consecutive blank tokens ... this should not
12407             # happen, but it is worth checking.  Later code can then make the
12408             # simplifying assumption that blank tokens are not consecutive.
12409             elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
12410
12411                 if (DEVEL_MODE) {
12412
12413                     # if this happens, it is may be that consecutive blanks
12414                     # were inserted into the token stream in 'respace_tokens'
12415                     my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
12416                     Fault("consecutive blanks near line $lno; please fix");
12417                 }
12418                 return;
12419             }
12420         }
12421
12422         # Do not start a batch with a blank token.
12423         # Fixes cases b149 b888 b984 b985 b986 b987
12424         else {
12425             if ( $type eq 'b' ) { return }
12426         }
12427
12428         #----------------------------
12429         # add this token to the batch
12430         #----------------------------
12431         $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars;
12432         $types_to_go[$max_index_to_go] = $type;
12433
12434         $old_breakpoint_to_go[$max_index_to_go]    = 0;
12435         $forced_breakpoint_to_go[$max_index_to_go] = 0;
12436         $mate_index_to_go[$max_index_to_go]        = -1;
12437
12438         my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
12439
12440         my $ci_level = $ci_levels_to_go[$max_index_to_go] =
12441           $rtoken_vars->[_CI_LEVEL_];
12442
12443         # Clip levels to zero if there are level errors in the file.
12444         # We had to wait until now for reasons explained in sub 'write_line'.
12445         my $level = $rtoken_vars->[_LEVEL_];
12446         if ( $level < 0 ) { $level = 0 }
12447         $levels_to_go[$max_index_to_go] = $level;
12448
12449         my $seqno = $type_sequence_to_go[$max_index_to_go] =
12450           $rtoken_vars->[_TYPE_SEQUENCE_];
12451
12452         my $in_continued_quote =
12453           ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
12454
12455         # Initializations for first token of new batch
12456         if ( $max_index_to_go == 0 ) {
12457
12458             $starting_in_quote = $in_continued_quote;
12459
12460             # Update the next parent sequence number for each new batch.
12461
12462             #----------------------------------------
12463             # Begin coding from sub parent_seqno_by_K
12464             #----------------------------------------
12465
12466             # The following is equivalent to this call but much faster:
12467             #    $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
12468
12469             $next_parent_seqno = SEQ_ROOT;
12470             if ($seqno) {
12471                 $next_parent_seqno = $rparent_of_seqno->{$seqno};
12472             }
12473             else {
12474                 my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
12475                 if ( defined($Kt) ) {
12476                     my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
12477                     my $type_t          = $rLL->[$Kt]->[_TYPE_];
12478
12479                     # if next container token is closing, it is the parent seqno
12480                     if ( $is_closing_type{$type_t} ) {
12481                         $next_parent_seqno = $type_sequence_t;
12482                     }
12483
12484                     # otherwise we want its parent container
12485                     else {
12486                         $next_parent_seqno =
12487                           $rparent_of_seqno->{$type_sequence_t};
12488                     }
12489                 }
12490             }
12491             $next_parent_seqno = SEQ_ROOT
12492               unless ( defined($next_parent_seqno) );
12493
12494             #--------------------------------------
12495             # End coding from sub parent_seqno_by_K
12496             #--------------------------------------
12497
12498             $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
12499         }
12500
12501         # Initialize some sequence-dependent variables to their normal values
12502         $parent_seqno_to_go[$max_index_to_go]  = $next_parent_seqno;
12503         $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
12504         $block_type_to_go[$max_index_to_go]    = EMPTY_STRING;
12505
12506         # Then fix them at container tokens:
12507         if ($seqno) {
12508
12509             $block_type_to_go[$max_index_to_go] =
12510               $rblock_type_of_seqno->{$seqno}
12511               if ( $rblock_type_of_seqno->{$seqno} );
12512
12513             if ( $is_opening_token{$token} ) {
12514
12515                 my $slevel = $rdepth_of_opening_seqno->[$seqno];
12516                 $nesting_depth_to_go[$max_index_to_go] = $slevel;
12517                 $next_slevel = $slevel + 1;
12518
12519                 $next_parent_seqno = $seqno;
12520
12521             }
12522             elsif ( $is_closing_token{$token} ) {
12523
12524                 $next_slevel = $rdepth_of_opening_seqno->[$seqno];
12525                 my $slevel = $next_slevel + 1;
12526                 $nesting_depth_to_go[$max_index_to_go] = $slevel;
12527
12528                 my $parent_seqno = $rparent_of_seqno->{$seqno};
12529                 $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
12530                 $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
12531                 $next_parent_seqno                    = $parent_seqno;
12532
12533             }
12534             else {
12535                 # ternary token: nothing to do
12536             }
12537         }
12538
12539         $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
12540
12541         my $length = $rtoken_vars->[_TOKEN_LENGTH_];
12542
12543         # Safety check that length is defined. Should not be needed now.
12544         # Former patch for indent-only, in which the entire set of tokens is
12545         # turned into type 'q'. Lengths may have not been defined because sub
12546         # 'respace_tokens' is bypassed. We do not need lengths in this case,
12547         # but we will use the character count to have a defined value.  In the
12548         # future, it would be nicer to have 'respace_tokens' convert the lines
12549         # to quotes and get correct lengths.
12550         if ( !defined($length) ) {
12551             $length = length($token);
12552         }
12553
12554         $token_lengths_to_go[$max_index_to_go] = $length;
12555
12556         # We keep a running sum of token lengths from the start of this batch:
12557         #   summed_lengths_to_go[$i]   = total length to just before token $i
12558         #   summed_lengths_to_go[$i+1] = total length to just after token $i
12559         $summed_lengths_to_go[ $max_index_to_go + 1 ] =
12560           $summed_lengths_to_go[$max_index_to_go] + $length;
12561
12562         # Define the indentation that this token will have in two cases:
12563         # Without CI = reduced_spaces_to_go
12564         # With CI    = leading_spaces_to_go
12565         if ($in_continued_quote) {
12566             $leading_spaces_to_go[$max_index_to_go] = 0;
12567             $reduced_spaces_to_go[$max_index_to_go] = 0;
12568         }
12569         else {
12570             $leading_spaces_to_go[$max_index_to_go] =
12571               $reduced_spaces_to_go[$max_index_to_go] =
12572               $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
12573
12574             $leading_spaces_to_go[$max_index_to_go] +=
12575               $rOpts_continuation_indentation * $ci_level
12576               if ($ci_level);
12577         }
12578
12579         DEBUG_STORE && do {
12580             my ( $a, $b, $c ) = caller();
12581             print STDOUT
12582 "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
12583         };
12584         return;
12585     } ## end sub store_token_to_go
12586
12587     sub flush_batch_of_CODE {
12588
12589         # Finish any batch packaging and call the process routine.
12590         # This must be the only call to grind_batch_of_CODE()
12591         my ($self) = @_;
12592
12593         if ( $max_index_to_go >= 0 ) {
12594
12595             # Create an array to hold variables for this batch
12596             my $this_batch = [];
12597
12598             $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
12599             $this_batch->[_ending_in_quote_]   = 1 if ($ending_in_quote);
12600
12601             if ( $CODE_type || $last_CODE_type ) {
12602                 $this_batch->[_batch_CODE_type_] =
12603                     $K_to_go[$max_index_to_go] >= $K_first
12604                   ? $CODE_type
12605                   : $last_CODE_type;
12606             }
12607
12608             $last_line_had_side_comment =
12609               ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
12610
12611             # The flag $is_static_block_comment applies to the line which just
12612             # arrived. So it only applies if we are outputting that line.
12613             if ( $is_static_block_comment && !$last_line_had_side_comment ) {
12614                 $this_batch->[_is_static_block_comment_] =
12615                   $K_to_go[0] == $K_first;
12616             }
12617
12618             $this_batch->[_ri_starting_one_line_block_] =
12619               $ri_starting_one_line_block;
12620
12621             $self->[_this_batch_] = $this_batch;
12622
12623             $self->grind_batch_of_CODE();
12624
12625             # Done .. this batch is history
12626             $self->[_this_batch_] = undef;
12627
12628             initialize_batch_variables();
12629         }
12630
12631         return;
12632     } ## end sub flush_batch_of_CODE
12633
12634     sub end_batch {
12635
12636         # end the current batch, EXCEPT for a few special cases
12637         my ($self) = @_;
12638
12639         if ( $max_index_to_go < 0 ) {
12640
12641             # This is harmless but should be eliminated in development
12642             if (DEVEL_MODE) {
12643                 Fault("End batch called with nothing to do; please fix\n");
12644             }
12645             return;
12646         }
12647
12648         # Exceptions when a line does not end with a comment... (fixes c058)
12649         if ( $types_to_go[$max_index_to_go] ne '#' ) {
12650
12651             # Exception 1: Do not end line in a weld
12652             return
12653               if ( $total_weld_count
12654                 && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
12655
12656             # Exception 2: just set a tentative breakpoint if we might be in a
12657             # one-line block
12658             if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12659                 $self->set_forced_breakpoint($max_index_to_go);
12660                 return;
12661             }
12662         }
12663
12664         $self->flush_batch_of_CODE();
12665         return;
12666     } ## end sub end_batch
12667
12668     sub flush_vertical_aligner {
12669         my ($self) = @_;
12670         my $vao = $self->[_vertical_aligner_object_];
12671         $vao->flush();
12672         return;
12673     }
12674
12675     # flush is called to output any tokens in the pipeline, so that
12676     # an alternate source of lines can be written in the correct order
12677     sub flush {
12678         my ( $self, $CODE_type_flush ) = @_;
12679
12680         # end the current batch with 1 exception
12681
12682         destroy_one_line_block();
12683
12684         # Exception: if we are flushing within the code stream only to insert
12685         # blank line(s), then we can keep the batch intact at a weld. This
12686         # improves formatting of -ce.  See test 'ce1.ce'
12687         if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
12688             $self->end_batch() if ( $max_index_to_go >= 0 );
12689         }
12690
12691         # otherwise, we have to shut things down completely.
12692         else { $self->flush_batch_of_CODE() }
12693
12694         $self->flush_vertical_aligner();
12695         return;
12696     } ## end sub flush
12697
12698     sub process_line_of_CODE {
12699
12700         my ( $self, $my_line_of_tokens ) = @_;
12701
12702         #----------------------------------------------------------------
12703         # This routine is called once per INPUT line to format all of the
12704         # tokens on that line.
12705         #----------------------------------------------------------------
12706
12707         # It outputs full-line comments and blank lines immediately.
12708
12709         # The tokens are copied one-by-one from the global token array $rLL to
12710         # a set of '_to_go' arrays which collect batches of tokens for a
12711         # further processing via calls to 'sub store_token_to_go', until a well
12712         # defined 'structural' break point* or 'forced' breakpoint* is reached.
12713         # Then, the batch of collected '_to_go' tokens is passed along to 'sub
12714         # grind_batch_of_CODE' for further processing.
12715
12716         # * 'structural' break points are basically line breaks corresponding
12717         # to code blocks.  An example is a chain of if-elsif-else statements,
12718         # which should typically be broken at the opening and closing braces.
12719
12720         # * 'forced' break points are breaks required by side comments or by
12721         # special user controls.
12722
12723         # So this routine is just making an initial set of required line
12724         # breaks, basically regardless of the maximum requested line length.
12725         # The subsequent stage of formatting make additional line breaks
12726         # appropriate for lists and logical structures, and to keep line
12727         # lengths below the requested maximum line length.
12728
12729         #-----------------------------------
12730         # begin initialize closure variables
12731         #-----------------------------------
12732         $line_of_tokens = $my_line_of_tokens;
12733         my $rK_range = $line_of_tokens->{_rK_range};
12734         if ( !defined( $rK_range->[0] ) ) {
12735
12736             # Empty line: This can happen if tokens are deleted, for example
12737             # with the -mangle parameter
12738             return;
12739         }
12740
12741         ( $K_first, $K_last ) = @{$rK_range};
12742         $last_CODE_type = $CODE_type;
12743         $CODE_type      = $line_of_tokens->{_code_type};
12744
12745         $rLL                     = $self->[_rLL_];
12746         $radjusted_levels        = $self->[_radjusted_levels_];
12747         $rparent_of_seqno        = $self->[_rparent_of_seqno_];
12748         $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
12749         $rblock_type_of_seqno    = $self->[_rblock_type_of_seqno_];
12750
12751         #---------------------------------
12752         # end initialize closure variables
12753         #---------------------------------
12754
12755         # This flag will become nobreak_to_go and should be set to 2 to prevent
12756         # a line break AFTER the current token.
12757         $no_internal_newlines = 0;
12758         if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
12759             $no_internal_newlines = 2;
12760         }
12761
12762         my $input_line = $line_of_tokens->{_line_text};
12763
12764         my ( $is_block_comment, $has_side_comment );
12765         if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
12766             if   ( $K_last == $K_first ) { $is_block_comment = 1 }
12767             else                         { $has_side_comment = 1 }
12768         }
12769
12770         my $is_static_block_comment_without_leading_space =
12771           $CODE_type eq 'SBCX';
12772         $is_static_block_comment =
12773           $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
12774
12775         # check for a $VERSION statement
12776         if ( $CODE_type eq 'VER' ) {
12777             $self->[_saw_VERSION_in_this_file_] = 1;
12778             $no_internal_newlines = 2;
12779         }
12780
12781         # Add interline blank if any
12782         my $last_old_nonblank_type   = "b";
12783         my $first_new_nonblank_token = EMPTY_STRING;
12784         my $K_first_true             = $K_first;
12785         if ( $max_index_to_go >= 0 ) {
12786             $last_old_nonblank_type   = $types_to_go[$max_index_to_go];
12787             $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
12788             if (  !$is_block_comment
12789                 && $types_to_go[$max_index_to_go] ne 'b'
12790                 && $K_first > 0
12791                 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
12792             {
12793                 $K_first -= 1;
12794             }
12795         }
12796
12797         my $rtok_first = $rLL->[$K_first];
12798
12799         my $in_quote = $line_of_tokens->{_ending_in_quote};
12800         $ending_in_quote = $in_quote;
12801
12802         #------------------------------------
12803         # Handle a block (full-line) comment.
12804         #------------------------------------
12805         if ($is_block_comment) {
12806
12807             if ( $rOpts->{'delete-block-comments'} ) {
12808                 $self->flush();
12809                 return;
12810             }
12811
12812             destroy_one_line_block();
12813             $self->end_batch() if ( $max_index_to_go >= 0 );
12814
12815             # output a blank line before block comments
12816             if (
12817                 # unless we follow a blank or comment line
12818                 $self->[_last_line_leading_type_] ne '#'
12819                 && $self->[_last_line_leading_type_] ne 'b'
12820
12821                 # only if allowed
12822                 && $rOpts->{'blanks-before-comments'}
12823
12824                 # if this is NOT an empty comment, unless it follows a side
12825                 # comment and could become a hanging side comment.
12826                 && (
12827                     $rtok_first->[_TOKEN_] ne '#'
12828                     || (   $last_line_had_side_comment
12829                         && $rLL->[$K_first]->[_LEVEL_] > 0 )
12830                 )
12831
12832                 # not after a short line ending in an opening token
12833                 # because we already have space above this comment.
12834                 # Note that the first comment in this if block, after
12835                 # the 'if (', does not get a blank line because of this.
12836                 && !$self->[_last_output_short_opening_token_]
12837
12838                 # never before static block comments
12839                 && !$is_static_block_comment
12840               )
12841             {
12842                 $self->flush();    # switching to new output stream
12843                 my $file_writer_object = $self->[_file_writer_object_];
12844                 $file_writer_object->write_blank_code_line();
12845                 $self->[_last_line_leading_type_] = 'b';
12846             }
12847
12848             if (
12849                 $rOpts->{'indent-block-comments'}
12850                 && (  !$rOpts->{'indent-spaced-block-comments'}
12851                     || $input_line =~ /^\s+/ )
12852                 && !$is_static_block_comment_without_leading_space
12853               )
12854             {
12855                 my $Ktoken_vars = $K_first;
12856                 my $rtoken_vars = $rLL->[$Ktoken_vars];
12857                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12858                 $self->end_batch();
12859             }
12860             else {
12861
12862                 # switching to new output stream
12863                 $self->flush();
12864
12865                 # Note that last arg in call here is 'undef' for comments
12866                 my $file_writer_object = $self->[_file_writer_object_];
12867                 $file_writer_object->write_code_line(
12868                     $rtok_first->[_TOKEN_] . "\n", undef );
12869                 $self->[_last_line_leading_type_] = '#';
12870             }
12871             return;
12872         }
12873
12874         # Compare input/output indentation except for:
12875         #  - hanging side comments
12876         #  - continuation lines (have unknown amount of initial blank space)
12877         #  - and lines which are quotes (because they may have been outdented)
12878         my $guessed_indentation_level =
12879           $line_of_tokens->{_guessed_indentation_level};
12880
12881         unless ( $CODE_type eq 'HSC'
12882             || $rtok_first->[_CI_LEVEL_] > 0
12883             || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
12884         {
12885             my $input_line_number = $line_of_tokens->{_line_number};
12886             $self->compare_indentation_levels( $K_first,
12887                 $guessed_indentation_level, $input_line_number );
12888         }
12889
12890         #------------------------
12891         # Handle indentation-only
12892         #------------------------
12893
12894         # NOTE: In previous versions we sent all qw lines out immediately here.
12895         # No longer doing this: also write a line which is entirely a 'qw' list
12896         # to allow stacking of opening and closing tokens.  Note that interior
12897         # qw lines will still go out at the end of this routine.
12898         if ( $CODE_type eq 'IO' ) {
12899             $self->flush();
12900             my $line = $input_line;
12901
12902             # Fix for rt #125506 Unexpected string formating
12903             # in which leading space of a terminal quote was removed
12904             $line =~ s/\s+$//;
12905             $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
12906
12907             my $Ktoken_vars = $K_first;
12908
12909             # We work with a copy of the token variables and change the
12910             # first token to be the entire line as a quote variable
12911             my $rtoken_vars = $rLL->[$Ktoken_vars];
12912             $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
12913
12914             # Patch: length is not really important here
12915             $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
12916
12917             $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12918             $self->end_batch();
12919             return;
12920         }
12921
12922         #---------------------------
12923         # Handle all other lines ...
12924         #---------------------------
12925
12926         # If we just saw the end of an elsif block, write nag message
12927         # if we do not see another elseif or an else.
12928         if ($looking_for_else) {
12929
12930             ##     /^(elsif|else)$/
12931             if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
12932                 write_logfile_entry("(No else block)\n");
12933             }
12934             $looking_for_else = 0;
12935         }
12936
12937         # This is a good place to kill incomplete one-line blocks
12938         if ( $max_index_to_go >= 0 ) {
12939             if (
12940                 (
12941                        ( $semicolons_before_block_self_destruct == 0 )
12942                     && ( $last_old_nonblank_type eq ';' )
12943                     && ( $first_new_nonblank_token ne '}' )
12944                 )
12945
12946                 # Patch for RT #98902. Honor request to break at old commas.
12947                 || (   $rOpts_break_at_old_comma_breakpoints
12948                     && $last_old_nonblank_type eq ',' )
12949               )
12950             {
12951                 $forced_breakpoint_to_go[$max_index_to_go] = 1
12952                   if ($rOpts_break_at_old_comma_breakpoints);
12953                 destroy_one_line_block();
12954                 $self->end_batch();
12955             }
12956
12957             # Keep any requested breaks before this line.  Note that we have to
12958             # use the original K_first because it may have been reduced above
12959             # to add a blank.  The value of the flag is as follows:
12960             #   1 => hard break, flush the batch
12961             #   2 => soft break, set breakpoint and continue building the batch
12962             if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
12963                 destroy_one_line_block();
12964                 if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
12965                     $self->set_forced_breakpoint($max_index_to_go);
12966                 }
12967                 else {
12968                     $self->end_batch() if ( $max_index_to_go >= 0 );
12969                 }
12970             }
12971         }
12972
12973         #--------------------------------------
12974         # loop to process the tokens one-by-one
12975         #--------------------------------------
12976
12977         # We do not want a leading blank if the previous batch just got output
12978
12979         if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
12980             $K_first++;
12981         }
12982
12983         foreach my $Ktoken_vars ( $K_first .. $K_last ) {
12984
12985             my $rtoken_vars = $rLL->[$Ktoken_vars];
12986
12987             #--------------
12988             # handle blanks
12989             #--------------
12990             if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
12991                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
12992                 next;
12993             }
12994
12995             #------------------
12996             # handle non-blanks
12997             #------------------
12998             my $type = $rtoken_vars->[_TYPE_];
12999
13000             # If we are continuing after seeing a right curly brace, flush
13001             # buffer unless we see what we are looking for, as in
13002             #   } else ...
13003             if ($rbrace_follower) {
13004                 my $token = $rtoken_vars->[_TOKEN_];
13005                 unless ( $rbrace_follower->{$token} ) {
13006                     $self->end_batch() if ( $max_index_to_go >= 0 );
13007                 }
13008                 $rbrace_follower = undef;
13009             }
13010
13011             my (
13012                 $block_type,       $type_sequence,
13013                 $is_opening_BLOCK, $is_closing_BLOCK,
13014                 $nobreak_BEFORE_BLOCK
13015             );
13016
13017             if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
13018
13019                 my $token = $rtoken_vars->[_TOKEN_];
13020                 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
13021                 $block_type    = $rblock_type_of_seqno->{$type_sequence};
13022
13023                 if (   $block_type
13024                     && $token eq $type
13025                     && $block_type ne 't'
13026                     && !$self->[_rshort_nested_]->{$type_sequence} )
13027                 {
13028
13029                     if ( $type eq '{' ) {
13030                         $is_opening_BLOCK     = 1;
13031                         $nobreak_BEFORE_BLOCK = $no_internal_newlines;
13032                     }
13033                     elsif ( $type eq '}' ) {
13034                         $is_closing_BLOCK     = 1;
13035                         $nobreak_BEFORE_BLOCK = $no_internal_newlines;
13036                     }
13037                 }
13038             }
13039
13040             # if at last token ...
13041             if ( $Ktoken_vars == $K_last ) {
13042
13043                 #---------------------
13044                 # handle side comments
13045                 #---------------------
13046                 if ($has_side_comment) {
13047                     $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13048                     next;
13049                 }
13050             }
13051
13052             # if before last token ... do not allow breaks which would promote
13053             # a side comment to a block comment
13054             elsif (
13055                 $has_side_comment
13056                 && (   $Ktoken_vars == $K_last - 1
13057                     || $Ktoken_vars == $K_last - 2
13058                     && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
13059               )
13060             {
13061                 $no_internal_newlines = 2;
13062             }
13063
13064             # Process non-blank and non-comment tokens ...
13065
13066             #-----------------
13067             # handle semicolon
13068             #-----------------
13069             if ( $type eq ';' ) {
13070
13071                 my $next_nonblank_token_type = 'b';
13072                 my $next_nonblank_token      = EMPTY_STRING;
13073                 if ( $Ktoken_vars < $K_last ) {
13074                     my $Knnb = $Ktoken_vars + 1;
13075                     $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
13076                     $next_nonblank_token      = $rLL->[$Knnb]->[_TOKEN_];
13077                     $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
13078                 }
13079
13080                 my $break_before_semicolon = ( $Ktoken_vars == $K_first )
13081                   && $rOpts_break_at_old_semicolon_breakpoints;
13082
13083                 # kill one-line blocks with too many semicolons
13084                 $semicolons_before_block_self_destruct--;
13085                 if (
13086                        $break_before_semicolon
13087                     || ( $semicolons_before_block_self_destruct < 0 )
13088                     || (   $semicolons_before_block_self_destruct == 0
13089                         && $next_nonblank_token_type !~ /^[b\}]$/ )
13090                   )
13091                 {
13092                     destroy_one_line_block();
13093                     $self->end_batch()
13094                       if ( $break_before_semicolon
13095                         && $max_index_to_go >= 0 );
13096                 }
13097
13098                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13099
13100                 $self->end_batch()
13101                   unless (
13102                     $no_internal_newlines
13103                     || (   $rOpts_keep_interior_semicolons
13104                         && $Ktoken_vars < $K_last )
13105                     || ( $next_nonblank_token eq '}' )
13106                   );
13107             }
13108
13109             #-----------
13110             # handle '{'
13111             #-----------
13112             elsif ($is_opening_BLOCK) {
13113
13114                 # Tentatively output this token.  This is required before
13115                 # calling starting_one_line_block.  We may have to unstore
13116                 # it, though, if we have to break before it.
13117                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13118
13119                 # Look ahead to see if we might form a one-line block..
13120                 my $too_long =
13121                   $self->starting_one_line_block( $Ktoken_vars,
13122                     $K_last_nonblank_code, $K_last );
13123                 $self->clear_breakpoint_undo_stack();
13124
13125                 # to simplify the logic below, set a flag to indicate if
13126                 # this opening brace is far from the keyword which introduces it
13127                 my $keyword_on_same_line = 1;
13128                 if (
13129                        $max_index_to_go >= 0
13130                     && defined($K_last_nonblank_code)
13131                     && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
13132                     && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
13133                         || $too_long )
13134                   )
13135                 {
13136                     $keyword_on_same_line = 0;
13137                 }
13138
13139                 # Break before '{' if requested with -bl or -bli flag
13140                 my $want_break = $self->[_rbrace_left_]->{$type_sequence};
13141
13142                 # But do not break if this token is welded to the left
13143                 if ( $total_weld_count
13144                     && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
13145                 {
13146                     $want_break = 0;
13147                 }
13148
13149                 # Break BEFORE an opening '{' ...
13150                 if (
13151
13152                     # if requested
13153                     $want_break
13154
13155                     # and we were unable to start looking for a block,
13156                     && $index_start_one_line_block == UNDEFINED_INDEX
13157
13158                     # or if it will not be on same line as its keyword, so that
13159                     # it will be outdented (eval.t, overload.t), and the user
13160                     # has not insisted on keeping it on the right
13161                     || (   !$keyword_on_same_line
13162                         && !$rOpts_opening_brace_always_on_right )
13163                   )
13164                 {
13165
13166                     # but only if allowed
13167                     unless ($nobreak_BEFORE_BLOCK) {
13168
13169                         # since we already stored this token, we must unstore it
13170                         $self->unstore_token_to_go();
13171
13172                         # then output the line
13173                         $self->end_batch() if ( $max_index_to_go >= 0 );
13174
13175                         # and now store this token at the start of a new line
13176                         $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13177                     }
13178                 }
13179
13180                 # now output this line
13181                 $self->end_batch()
13182                   if ( $max_index_to_go >= 0 && !$no_internal_newlines );
13183             }
13184
13185             #-----------
13186             # handle '}'
13187             #-----------
13188             elsif ($is_closing_BLOCK) {
13189
13190                 my $next_nonblank_token_type = 'b';
13191                 my $next_nonblank_token      = EMPTY_STRING;
13192                 my $Knnb;
13193                 if ( $Ktoken_vars < $K_last ) {
13194                     $Knnb = $Ktoken_vars + 1;
13195                     $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
13196                     $next_nonblank_token      = $rLL->[$Knnb]->[_TOKEN_];
13197                     $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
13198                 }
13199
13200                 # If there is a pending one-line block ..
13201                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13202
13203                     # Fix for b1208: if a side comment follows this closing
13204                     # brace then we must include its length in the length test
13205                     # ... unless the -issl flag is set (fixes b1307-1309).
13206                     # Assume a minimum of 1 blank space to the comment.
13207                     my $added_length = 0;
13208                     if (   $has_side_comment
13209                         && !$rOpts_ignore_side_comment_lengths
13210                         && $next_nonblank_token_type eq '#' )
13211                     {
13212                         $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
13213                     }
13214
13215                     # we have to terminate it if..
13216                     if (
13217
13218                         # it is too long (final length may be different from
13219                         # initial estimate). note: must allow 1 space for this
13220                         # token
13221                         $self->excess_line_length( $index_start_one_line_block,
13222                             $max_index_to_go ) + $added_length >= 0
13223
13224                         # or if it has too many semicolons
13225                         || (   $semicolons_before_block_self_destruct == 0
13226                             && defined($K_last_nonblank_code)
13227                             && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
13228                       )
13229                     {
13230                         destroy_one_line_block();
13231                     }
13232                 }
13233
13234                 # put a break before this closing curly brace if appropriate
13235                 $self->end_batch()
13236                   if ( $max_index_to_go >= 0
13237                     && !$nobreak_BEFORE_BLOCK
13238                     && $index_start_one_line_block == UNDEFINED_INDEX );
13239
13240                 # store the closing curly brace
13241                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13242
13243                 # ok, we just stored a closing curly brace.  Often, but
13244                 # not always, we want to end the line immediately.
13245                 # So now we have to check for special cases.
13246
13247                 # if this '}' successfully ends a one-line block..
13248                 my $is_one_line_block = 0;
13249                 my $keep_going        = 0;
13250                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
13251
13252                     # Remember the type of token just before the
13253                     # opening brace.  It would be more general to use
13254                     # a stack, but this will work for one-line blocks.
13255                     $is_one_line_block =
13256                       $types_to_go[$index_start_one_line_block];
13257
13258                     # we have to actually make it by removing tentative
13259                     # breaks that were set within it
13260                     $self->undo_forced_breakpoint_stack(0);
13261
13262                     # For -lp, extend the nobreak to include a trailing
13263                     # terminal ','.  This is because the -lp indentation was
13264                     # not known when making one-line blocks, so we may be able
13265                     # to move the line back to fit.  Otherwise we may create a
13266                     # needlessly stranded comma on the next line.
13267                     my $iend_nobreak = $max_index_to_go - 1;
13268                     if (   $rOpts_line_up_parentheses
13269                         && $next_nonblank_token_type eq ','
13270                         && $Knnb eq $K_last )
13271                     {
13272                         my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13273                         my $is_excluded =
13274                           $self->[_ris_excluded_lp_container_]->{$p_seqno};
13275                         $iend_nobreak = $max_index_to_go if ( !$is_excluded );
13276                     }
13277
13278                     $self->set_nobreaks( $index_start_one_line_block,
13279                         $iend_nobreak );
13280
13281                     # save starting block indexes so that sub correct_lp can
13282                     # check and adjust -lp indentation (c098)
13283                     push @{$ri_starting_one_line_block},
13284                       $index_start_one_line_block;
13285
13286                     # then re-initialize for the next one-line block
13287                     destroy_one_line_block();
13288
13289                     # then decide if we want to break after the '}' ..
13290                     # We will keep going to allow certain brace followers as in:
13291                     #   do { $ifclosed = 1; last } unless $losing;
13292                     #
13293                     # But make a line break if the curly ends a
13294                     # significant block:
13295                     if (
13296                         (
13297                             $is_block_without_semicolon{$block_type}
13298
13299                             # Follow users break point for
13300                             # one line block types U & G, such as a 'try' block
13301                             || $is_one_line_block =~ /^[UG]$/
13302                             && $Ktoken_vars == $K_last
13303                         )
13304
13305                         # if needless semicolon follows we handle it later
13306                         && $next_nonblank_token ne ';'
13307                       )
13308                     {
13309                         $self->end_batch()
13310                           unless ($no_internal_newlines);
13311                     }
13312                 }
13313
13314                 # set string indicating what we need to look for brace follower
13315                 # tokens
13316                 if ( $is_if_unless_elsif_else{$block_type} ) {
13317                     $rbrace_follower = undef;
13318                 }
13319                 elsif ( $block_type eq 'do' ) {
13320                     $rbrace_follower = \%is_do_follower;
13321                     if (
13322                         $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
13323                       )
13324                     {
13325                         $rbrace_follower = { ')' => 1 };
13326                     }
13327                 }
13328
13329                 # added eval for borris.t
13330                 elsif ($is_sort_map_grep_eval{$block_type}
13331                     || $is_one_line_block eq 'G' )
13332                 {
13333                     $rbrace_follower = undef;
13334                     $keep_going      = 1;
13335                 }
13336
13337                 # anonymous sub
13338                 elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
13339                     if ($is_one_line_block) {
13340
13341                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
13342
13343                         # Exceptions to help keep -lp intact, see git #74 ...
13344                         # Exception 1: followed by '}' on this line
13345                         if (   $Ktoken_vars < $K_last
13346                             && $next_nonblank_token eq '}' )
13347                         {
13348                             $rbrace_follower = undef;
13349                             $keep_going      = 1;
13350                         }
13351
13352                         # Exception 2: followed by '}' on next line if -lp set.
13353                         # The -lp requirement allows the formatting to follow
13354                         # old breaks when -lp is not used, minimizing changes.
13355                         # Fixes issue c087.
13356                         elsif ($Ktoken_vars == $K_last
13357                             && $rOpts_line_up_parentheses )
13358                         {
13359                             my $K_closing_container =
13360                               $self->[_K_closing_container_];
13361                             my $K_opening_container =
13362                               $self->[_K_opening_container_];
13363                             my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
13364                             my $Kc      = $K_closing_container->{$p_seqno};
13365                             my $is_excluded =
13366                               $self->[_ris_excluded_lp_container_]->{$p_seqno};
13367                             if (   defined($Kc)
13368                                 && $rLL->[$Kc]->[_TOKEN_] eq '}'
13369                                 && !$is_excluded
13370                                 && $Kc - $Ktoken_vars <= 2 )
13371                             {
13372                                 $rbrace_follower = undef;
13373                                 $keep_going      = 1;
13374                             }
13375                         }
13376                     }
13377                     else {
13378                         $rbrace_follower = \%is_anon_sub_brace_follower;
13379                     }
13380                 }
13381
13382                 # None of the above: specify what can follow a closing
13383                 # brace of a block which is not an
13384                 # if/elsif/else/do/sort/map/grep/eval
13385                 # Testfiles:
13386                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
13387                 else {
13388                     $rbrace_follower = \%is_other_brace_follower;
13389                 }
13390
13391                 # See if an elsif block is followed by another elsif or else;
13392                 # complain if not.
13393                 if ( $block_type eq 'elsif' ) {
13394
13395                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
13396                         $looking_for_else = 1;    # ok, check on next line
13397                     }
13398                     else {
13399                         ##    /^(elsif|else)$/
13400                         if ( !$is_elsif_else{$next_nonblank_token} ) {
13401                             write_logfile_entry("No else block :(\n");
13402                         }
13403                     }
13404                 }
13405
13406                 # keep going after certain block types (map,sort,grep,eval)
13407                 # added eval for borris.t
13408                 if ($keep_going) {
13409
13410                     # keep going
13411                 }
13412
13413                 # if no more tokens, postpone decision until re-entering
13414                 elsif ( ( $next_nonblank_token_type eq 'b' )
13415                     && $rOpts_add_newlines )
13416                 {
13417                     unless ($rbrace_follower) {
13418                         $self->end_batch()
13419                           unless ( $no_internal_newlines
13420                             || $max_index_to_go < 0 );
13421                     }
13422                 }
13423                 elsif ($rbrace_follower) {
13424
13425                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
13426                         $self->end_batch()
13427                           unless ( $no_internal_newlines
13428                             || $max_index_to_go < 0 );
13429                     }
13430                     $rbrace_follower = undef;
13431                 }
13432
13433                 else {
13434                     $self->end_batch()
13435                       unless ( $no_internal_newlines
13436                         || $max_index_to_go < 0 );
13437                 }
13438
13439             } ## end treatment of closing block token
13440
13441             #------------------------------
13442             # handle here_doc target string
13443             #------------------------------
13444             elsif ( $type eq 'h' ) {
13445
13446                 # no newlines after seeing here-target
13447                 $no_internal_newlines = 2;
13448                 ## destroy_one_line_block();  # deleted to fix case b529
13449                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13450             }
13451
13452             #-----------------------------
13453             # handle all other token types
13454             #-----------------------------
13455             else {
13456
13457                 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
13458
13459                 # break after a label if requested
13460                 if (   $rOpts_break_after_labels
13461                     && $type eq 'J'
13462                     && $rOpts_break_after_labels == 1 )
13463                 {
13464                     $self->end_batch()
13465                       unless ($no_internal_newlines);
13466                 }
13467             }
13468
13469             # remember previous nonblank, non-comment OUTPUT token
13470             $K_last_nonblank_code = $Ktoken_vars;
13471
13472         } ## end of loop over all tokens in this line
13473
13474         # if there is anything left in the output buffer ...
13475         if ( $max_index_to_go >= 0 ) {
13476
13477             my $type       = $rLL->[$K_last]->[_TYPE_];
13478             my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
13479
13480             # we have to flush ..
13481             if (
13482
13483                 # if there is a side comment...
13484                 $type eq '#'
13485
13486                 # if this line ends in a quote
13487                 # NOTE: This is critically important for insuring that quoted
13488                 # lines do not get processed by things like -sot and -sct
13489                 || $in_quote
13490
13491                 # if this is a VERSION statement
13492                 || $CODE_type eq 'VER'
13493
13494                 # to keep a label at the end of a line
13495                 || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
13496
13497                 # if we have a hard break request
13498                 || $break_flag && $break_flag != 2
13499
13500                 # if we are instructed to keep all old line breaks
13501                 || !$rOpts->{'delete-old-newlines'}
13502
13503                 # if this is a line of the form 'use overload'. A break here in
13504                 # the input file is a good break because it will allow the
13505                 # operators which follow to be formatted well. Without this
13506                 # break the formatting with -ci=4 -xci is poor, for example.
13507
13508                 #   use overload
13509                 #     '+' => sub {
13510                 #       print length $_[2], "\n";
13511                 #       my ( $x, $y ) = _order(@_);
13512                 #       Number::Roman->new( int $x + $y );
13513                 #     },
13514                 #     '-' => sub {
13515                 #       my ( $x, $y ) = _order(@_);
13516                 #       Number::Roman->new( int $x - $y );
13517                 #     };
13518                 || (   $max_index_to_go == 2
13519                     && $types_to_go[0] eq 'k'
13520                     && $tokens_to_go[0] eq 'use'
13521                     && $tokens_to_go[$max_index_to_go] eq 'overload' )
13522               )
13523             {
13524                 destroy_one_line_block();
13525                 $self->end_batch();
13526             }
13527
13528             else {
13529
13530                 # Check for a soft break request
13531                 if ( $break_flag && $break_flag == 2 ) {
13532                     $self->set_forced_breakpoint($max_index_to_go);
13533                 }
13534
13535                 # mark old line breakpoints in current output stream
13536                 if (  !$rOpts_ignore_old_breakpoints
13537                     || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
13538                 {
13539                     my $jobp = $max_index_to_go;
13540                     if (   $types_to_go[$max_index_to_go] eq 'b'
13541                         && $max_index_to_go > 0 )
13542                     {
13543                         $jobp--;
13544                     }
13545                     $old_breakpoint_to_go[$jobp] = 1;
13546                 }
13547             }
13548         }
13549
13550         return;
13551     } ## end sub process_line_of_CODE
13552 } ## end closure process_line_of_CODE
13553
13554 sub tight_paren_follows {
13555
13556     my ( $self, $K_to_go_0, $K_ic ) = @_;
13557
13558     # Input parameters:
13559     #   $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
13560     #   $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
13561     # Return parameter:
13562     #   false if we want a break after the closing do brace
13563     #   true if we do not want a break after the closing do brace
13564
13565     # We are at the closing brace of a 'do' block.  See if this brace is
13566     # followed by a closing paren, and if so, set a flag which indicates
13567     # that we do not want a line break between the '}' and ')'.
13568
13569     # xxxxx ( ...... do {  ... } ) {
13570     #                          ^-------looking at this brace, K_ic
13571
13572     # Subscript notation:
13573     # _i = inner container (braces in this case)
13574     # _o = outer container (parens in this case)
13575     # _io = inner opening = '{'
13576     # _ic = inner closing = '}'
13577     # _oo = outer opening = '('
13578     # _oc = outer closing = ')'
13579
13580     #       |--K_oo                 |--K_oc  = outer container
13581     # xxxxx ( ...... do {  ...... } ) {
13582     #                   |--K_io   |--K_ic    = inner container
13583
13584     # In general, the safe thing to do is return a 'false' value
13585     # if the statement appears to be complex.  This will have
13586     # the downstream side-effect of opening up outer containers
13587     # to help make complex code readable.  But for simpler
13588     # do blocks it can be preferable to keep the code compact
13589     # by returning a 'true' value.
13590
13591     return unless defined($K_ic);
13592     my $rLL = $self->[_rLL_];
13593
13594     # we should only be called at a closing block
13595     my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
13596     return unless ($seqno_i);    # shouldn't happen;
13597
13598     # This only applies if the next nonblank is a ')'
13599     my $K_oc = $self->K_next_nonblank($K_ic);
13600     return unless defined($K_oc);
13601     my $token_next = $rLL->[$K_oc]->[_TOKEN_];
13602     return unless ( $token_next eq ')' );
13603
13604     my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
13605     my $K_io    = $self->[_K_opening_container_]->{$seqno_i};
13606     my $K_oo    = $self->[_K_opening_container_]->{$seqno_o};
13607     return unless ( defined($K_io) && defined($K_oo) );
13608
13609     # RULE 1: Do not break before a closing signature paren
13610     # (regardless of complexity).  This is a fix for issue git#22.
13611     # Looking for something like:
13612     #   sub xxx ( ... do {  ... } ) {
13613     #                               ^----- next block_type
13614     my $K_test = $self->K_next_nonblank($K_oc);
13615     if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
13616         my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
13617         if ($seqno_test) {
13618             if (   $self->[_ris_asub_block_]->{$seqno_test}
13619                 || $self->[_ris_sub_block_]->{$seqno_test} )
13620             {
13621                 return 1;
13622             }
13623         }
13624     }
13625
13626     # RULE 2: Break if the contents within braces appears to be 'complex'.  We
13627     # base this decision on the number of tokens between braces.
13628
13629     # xxxxx ( ... do {  ... } ) {
13630     #                 ^^^^^^
13631
13632     # Although very simple, it has the advantages of (1) being insensitive to
13633     # changes in lengths of identifier names, (2) easy to understand, implement
13634     # and test.  A test case for this is 't/snippets/long_line.in'.
13635
13636     # Example: $K_ic - $K_oo = 9       [Pass Rule 2]
13637     # if ( do { $2 !~ /&/ } ) { ... }
13638
13639     # Example: $K_ic - $K_oo = 10      [Pass Rule 2]
13640     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13641
13642     # Example: $K_ic - $K_oo = 20      [Fail Rule 2]
13643     # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
13644
13645     return if ( $K_ic - $K_io > 16 );
13646
13647     # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
13648     # As with the previous rule, we decide based on the token count
13649
13650     # xxxxx ( ... do {  ... } ) {
13651     #        ^^^^^^^^
13652
13653     # Example: $K_ic - $K_oo = 9       [Pass Rule 2]
13654     #          $K_io - $K_oo = 4       [Pass Rule 3]
13655     # if ( do { $2 !~ /&/ } ) { ... }
13656
13657     # Example: $K_ic - $K_oo = 10    [Pass rule 2]
13658     #          $K_io - $K_oo = 9     [Pass rule 3]
13659     # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
13660
13661     return if ( $K_io - $K_oo > 9 );
13662
13663     # RULE 4: Break if we have already broken this batch of output tokens
13664     return if ( $K_oo < $K_to_go_0 );
13665
13666     # RULE 5: Break if input is not on one line
13667     # For example, we will set the flag for the following expression
13668     # written in one line:
13669
13670     # This has: $K_ic - $K_oo = 10    [Pass rule 2]
13671     #           $K_io - $K_oo = 8     [Pass rule 3]
13672     #   $self->debug( 'Error: ' . do { local $/; <$err> } );
13673
13674     # but we break after the brace if it is on multiple lines on input, since
13675     # the user may prefer it on multiple lines:
13676
13677     # [Fail rule 5]
13678     #   $self->debug(
13679     #       'Error: ' . do { local $/; <$err> }
13680     #   );
13681
13682     if ( !$rOpts_ignore_old_breakpoints ) {
13683         my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
13684         my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
13685         return if ( $iline_oo != $iline_oc );
13686     }
13687
13688     # OK to keep the paren tight
13689     return 1;
13690 } ## end sub tight_paren_follows
13691
13692 my %is_brace_semicolon_colon;
13693
13694 BEGIN {
13695     my @q = qw( { } ; : );
13696     @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
13697 }
13698
13699 sub starting_one_line_block {
13700
13701     # after seeing an opening curly brace, look for the closing brace and see
13702     # if the entire block will fit on a line.  This routine is not always right
13703     # so a check is made later (at the closing brace) to make sure we really
13704     # have a one-line block.  We have to do this preliminary check, though,
13705     # because otherwise we would always break at a semicolon within a one-line
13706     # block if the block contains multiple statements.
13707
13708     my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
13709
13710     my $rbreak_container     = $self->[_rbreak_container_];
13711     my $rshort_nested        = $self->[_rshort_nested_];
13712     my $rLL                  = $self->[_rLL_];
13713     my $K_opening_container  = $self->[_K_opening_container_];
13714     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
13715
13716     # kill any current block - we can only go 1 deep
13717     destroy_one_line_block();
13718
13719     # return value:
13720     #  1=distance from start of block to opening brace exceeds line length
13721     #  0=otherwise
13722
13723     my $i_start = 0;
13724
13725     # This routine should not have been called if there are no tokens in the
13726     # 'to_go' arrays of previously stored tokens.  A previous call to
13727     # 'store_token_to_go' should have stored an opening brace. An error here
13728     # indicates that a programming change may have caused a flush operation to
13729     # clean out the previously stored tokens.
13730     if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
13731         Fault("program bug: store_token_to_go called incorrectly\n")
13732           if (DEVEL_MODE);
13733         return 0;
13734     }
13735
13736     # Return if block should be broken
13737     my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
13738     if ( $rbreak_container->{$type_sequence_j} ) {
13739         return 0;
13740     }
13741
13742     my $ris_bli_container = $self->[_ris_bli_container_];
13743     my $is_bli            = $ris_bli_container->{$type_sequence_j};
13744
13745     my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
13746     $block_type = EMPTY_STRING unless ( defined($block_type) );
13747
13748     my $previous_nonblank_token = EMPTY_STRING;
13749     my $i_last_nonblank         = -1;
13750     if ( defined($K_last_nonblank) ) {
13751         $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
13752         if ( $i_last_nonblank >= 0 ) {
13753             $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
13754         }
13755     }
13756
13757     # find the starting keyword for this block (such as 'if', 'else', ...)
13758     if (
13759         $max_index_to_go == 0
13760         ##|| $block_type =~ /^[\{\}\;\:]$/
13761         || $is_brace_semicolon_colon{$block_type}
13762         || substr( $block_type, 0, 7 ) eq 'package'
13763       )
13764     {
13765         $i_start = $max_index_to_go;
13766     }
13767
13768     # the previous nonblank token should start these block types
13769     elsif (
13770         $i_last_nonblank >= 0
13771         && (   $previous_nonblank_token eq $block_type
13772             || $self->[_ris_asub_block_]->{$type_sequence_j}
13773             || $self->[_ris_sub_block_]->{$type_sequence_j}
13774             || substr( $block_type, -2, 2 ) eq '()' )
13775       )
13776     {
13777         $i_start = $i_last_nonblank;
13778
13779         # For signatures and extended syntax ...
13780         # If this brace follows a parenthesized list, we should look back to
13781         # find the keyword before the opening paren because otherwise we might
13782         # form a one line block which stays intact, and cause the parenthesized
13783         # expression to break open. That looks bad.
13784         if ( $tokens_to_go[$i_start] eq ')' ) {
13785
13786             # Find the opening paren
13787             my $K_start = $K_to_go[$i_start];
13788             return 0 unless defined($K_start);
13789             my $seqno = $type_sequence_to_go[$i_start];
13790             return 0 unless ($seqno);
13791             my $K_opening = $K_opening_container->{$seqno};
13792             return 0 unless defined($K_opening);
13793             my $i_opening = $i_start + ( $K_opening - $K_start );
13794
13795             # give up if not on this line
13796             return 0 unless ( $i_opening >= 0 );
13797             $i_start = $i_opening;    ##$index_max_forced_break + 1;
13798
13799             # go back one token before the opening paren
13800             if ( $i_start > 0 )                                  { $i_start-- }
13801             if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
13802             my $lev = $levels_to_go[$i_start];
13803             if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
13804         }
13805     }
13806
13807     elsif ( $previous_nonblank_token eq ')' ) {
13808
13809         # For something like "if (xxx) {", the keyword "if" will be
13810         # just after the most recent break. This will be 0 unless
13811         # we have just killed a one-line block and are starting another.
13812         # (doif.t)
13813         # Note: cannot use inext_index_to_go[] here because that array
13814         # is still being constructed.
13815         $i_start = $index_max_forced_break + 1;
13816         if ( $types_to_go[$i_start] eq 'b' ) {
13817             $i_start++;
13818         }
13819
13820         # Patch to avoid breaking short blocks defined with extended_syntax:
13821         # Strip off any trailing () which was added in the parser to mark
13822         # the opening keyword.  For example, in the following
13823         #    create( TypeFoo $e) {$bubba}
13824         # the blocktype would be marked as create()
13825         my $stripped_block_type = $block_type;
13826         if ( substr( $block_type, -2, 2 ) eq '()' ) {
13827             $stripped_block_type = substr( $block_type, 0, -2 );
13828         }
13829         unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
13830             return 0;
13831         }
13832     }
13833
13834     # patch for SWITCH/CASE to retain one-line case/when blocks
13835     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
13836
13837         # Note: cannot use inext_index_to_go[] here because that array
13838         # is still being constructed.
13839         $i_start = $index_max_forced_break + 1;
13840         if ( $types_to_go[$i_start] eq 'b' ) {
13841             $i_start++;
13842         }
13843         unless ( $tokens_to_go[$i_start] eq $block_type ) {
13844             return 0;
13845         }
13846     }
13847
13848     else {
13849         return 1;
13850     }
13851
13852     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
13853
13854     my $maximum_line_length =
13855       $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
13856
13857     # see if block starting location is too great to even start
13858     if ( $pos > $maximum_line_length ) {
13859         return 1;
13860     }
13861
13862     # See if everything to the closing token will fit on one line
13863     # This is part of an update to fix cases b562 .. b983
13864     my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
13865     return 0 unless ( defined($K_closing) );
13866     my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
13867       $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
13868
13869     my $excess = $pos + 1 + $container_length - $maximum_line_length;
13870
13871     # Add a small tolerance for welded tokens (case b901)
13872     if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
13873         $excess += 2;
13874     }
13875
13876     if ( $excess > 0 ) {
13877
13878         # line is too long...  there is no chance of forming a one line block
13879         # if the excess is more than 1 char
13880         return 0 if ( $excess > 1 );
13881
13882         # ... and give up if it is not a one-line block on input.
13883         # note: for a one-line block on input, it may be possible to keep
13884         # it as a one-line block (by removing a needless semicolon ).
13885         my $K_start = $K_to_go[$i_start];
13886         my $ldiff =
13887           $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
13888         return 0 if ($ldiff);
13889     }
13890
13891     foreach my $Ki ( $Kj + 1 .. $K_last ) {
13892
13893         # old whitespace could be arbitrarily large, so don't use it
13894         if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
13895         else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
13896
13897         # ignore some small blocks
13898         my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
13899         my $nobreak         = $rshort_nested->{$type_sequence_i};
13900
13901         # Return false result if we exceed the maximum line length,
13902         if ( $pos > $maximum_line_length ) {
13903             return 0;
13904         }
13905
13906         # keep going for non-containers
13907         elsif ( !$type_sequence_i ) {
13908
13909         }
13910
13911         # return if we encounter another opening brace before finding the
13912         # closing brace.
13913         elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
13914             && $rLL->[$Ki]->[_TYPE_] eq '{'
13915             && $rblock_type_of_seqno->{$type_sequence_i}
13916             && !$nobreak )
13917         {
13918             return 0;
13919         }
13920
13921         # if we find our closing brace..
13922         elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
13923             && $rLL->[$Ki]->[_TYPE_] eq '}'
13924             && $rblock_type_of_seqno->{$type_sequence_i}
13925             && !$nobreak )
13926         {
13927
13928             # be sure any trailing comment also fits on the line
13929             my $Ki_nonblank = $Ki;
13930             if ( $Ki_nonblank < $K_last ) {
13931                 $Ki_nonblank++;
13932                 if (   $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
13933                     && $Ki_nonblank < $K_last )
13934                 {
13935                     $Ki_nonblank++;
13936                 }
13937             }
13938
13939             # Patch for one-line sort/map/grep/eval blocks with side comments:
13940             # We will ignore the side comment length for sort/map/grep/eval
13941             # because this can lead to statements which change every time
13942             # perltidy is run.  Here is an example from Denis Moskowitz which
13943             # oscillates between these two states without this patch:
13944
13945 ## --------
13946 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13947 ##  @baz;
13948 ##
13949 ## grep {
13950 ##     $_->foo ne 'bar'
13951 ##   }    # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13952 ##   @baz;
13953 ## --------
13954
13955             # When the first line is input it gets broken apart by the main
13956             # line break logic in sub process_line_of_CODE.
13957             # When the second line is input it gets recombined by
13958             # process_line_of_CODE and passed to the output routines.  The
13959             # output routines (break_long_lines) do not break it apart
13960             # because the bond strengths are set to the highest possible value
13961             # for grep/map/eval/sort blocks, so the first version gets output.
13962             # It would be possible to fix this by changing bond strengths,
13963             # but they are high to prevent errors in older versions of perl.
13964             # See c100 for eval test.
13965             if (   $Ki < $K_last
13966                 && $rLL->[$K_last]->[_TYPE_] eq '#'
13967                 && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
13968                 && !$rOpts_ignore_side_comment_lengths
13969                 && !$is_sort_map_grep_eval{$block_type}
13970                 && $K_last - $Ki_nonblank <= 2 )
13971             {
13972                 # Only include the side comment for if/else/elsif/unless if it
13973                 # immediately follows (because the current '$rbrace_follower'
13974                 # logic for these will give an immediate brake after these
13975                 # closing braces).  So for example a line like this
13976                 #     if (...) { ... } ; # very long comment......
13977                 # will already break like this:
13978                 #     if (...) { ... }
13979                 #     ; # very long comment......
13980                 # so we do not need to include the length of the comment, which
13981                 # would break the block. Project 'bioperl' has coding like this.
13982                 ##    !~ /^(if|else|elsif|unless)$/
13983                 if (  !$is_if_unless_elsif_else{$block_type}
13984                     || $K_last == $Ki_nonblank )
13985                 {
13986                     $Ki_nonblank = $K_last;
13987                     $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
13988
13989                     if ( $Ki_nonblank > $Ki + 1 ) {
13990
13991                         # source whitespace could be anything, assume
13992                         # at least one space before the hash on output
13993                         if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
13994                             $pos += 1;
13995                         }
13996                         else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
13997                     }
13998
13999                     if ( $pos >= $maximum_line_length ) {
14000                         return 0;
14001                     }
14002                 }
14003             }
14004
14005             # ok, it's a one-line block
14006             create_one_line_block( $i_start, 20 );
14007             return 0;
14008         }
14009
14010         # just keep going for other characters
14011         else {
14012         }
14013     }
14014
14015     # We haven't hit the closing brace, but there is still space. So the
14016     # question here is, should we keep going to look at more lines in hopes of
14017     # forming a new one-line block, or should we stop right now. The problem
14018     # with continuing is that we will not be able to honor breaks before the
14019     # opening brace if we continue.
14020
14021     # Typically we will want to keep trying to make one-line blocks for things
14022     # like sort/map/grep/eval.  But it is not always a good idea to make as
14023     # many one-line blocks as possible, so other types are not done.  The user
14024     # can always use -mangle.
14025
14026     # If we want to keep going, we will create a new one-line block.
14027     # The blocks which we can keep going are in a hash, but we never want
14028     # to continue if we are at a '-bli' block.
14029     if ( $want_one_line_block{$block_type} && !$is_bli ) {
14030         create_one_line_block( $i_start, 1 );
14031     }
14032     return 0;
14033 } ## end sub starting_one_line_block
14034
14035 sub unstore_token_to_go {
14036
14037     # remove most recent token from output stream
14038     my $self = shift;
14039     if ( $max_index_to_go > 0 ) {
14040         $max_index_to_go--;
14041     }
14042     else {
14043         $max_index_to_go = UNDEFINED_INDEX;
14044     }
14045     return;
14046 } ## end sub unstore_token_to_go
14047
14048 sub compare_indentation_levels {
14049
14050     # Check to see if output line tabbing agrees with input line
14051     # this can be very useful for debugging a script which has an extra
14052     # or missing brace.
14053
14054     my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
14055     return unless ( defined($K_first) );
14056
14057     my $rLL = $self->[_rLL_];
14058
14059     my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
14060     my $radjusted_levels             = $self->[_radjusted_levels_];
14061     if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
14062         $structural_indentation_level = $radjusted_levels->[$K_first];
14063     }
14064
14065     # record max structural depth for log file
14066     if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
14067         $self->[_maximum_BLOCK_level_]         = $structural_indentation_level;
14068         $self->[_maximum_BLOCK_level_at_line_] = $line_number;
14069     }
14070
14071     my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
14072     my $is_closing_block =
14073          $type_sequence
14074       && $self->[_rblock_type_of_seqno_]->{$type_sequence}
14075       && $rLL->[$K_first]->[_TYPE_] eq '}';
14076
14077     if ( $guessed_indentation_level ne $structural_indentation_level ) {
14078         $self->[_last_tabbing_disagreement_] = $line_number;
14079
14080         if ($is_closing_block) {
14081
14082             if ( !$self->[_in_brace_tabbing_disagreement_] ) {
14083                 $self->[_in_brace_tabbing_disagreement_] = $line_number;
14084             }
14085             if ( !$self->[_first_brace_tabbing_disagreement_] ) {
14086                 $self->[_first_brace_tabbing_disagreement_] = $line_number;
14087             }
14088         }
14089
14090         if ( !$self->[_in_tabbing_disagreement_] ) {
14091             $self->[_tabbing_disagreement_count_]++;
14092
14093             if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
14094                 write_logfile_entry(
14095 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
14096                 );
14097             }
14098             $self->[_in_tabbing_disagreement_]    = $line_number;
14099             $self->[_first_tabbing_disagreement_] = $line_number
14100               unless ( $self->[_first_tabbing_disagreement_] );
14101         }
14102     }
14103     else {
14104
14105         $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
14106
14107         my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
14108         if ($in_tabbing_disagreement) {
14109
14110             if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
14111                 write_logfile_entry(
14112 "End indentation disagreement from input line $in_tabbing_disagreement\n"
14113                 );
14114
14115                 if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
14116                 {
14117                     write_logfile_entry(
14118                         "No further tabbing disagreements will be noted\n");
14119                 }
14120             }
14121             $self->[_in_tabbing_disagreement_] = 0;
14122
14123         }
14124     }
14125     return;
14126 } ## end sub compare_indentation_levels
14127
14128 ###################################################
14129 # CODE SECTION 8: Utilities for setting breakpoints
14130 ###################################################
14131
14132 {    ## begin closure set_forced_breakpoint
14133
14134     my @forced_breakpoint_undo_stack;
14135
14136     # These are global vars for efficiency:
14137     # my $forced_breakpoint_count;
14138     # my $forced_breakpoint_undo_count;
14139     # my $index_max_forced_break;
14140
14141     # Break before or after certain tokens based on user settings
14142     my %break_before_or_after_token;
14143
14144     BEGIN {
14145
14146         # Updated to use all operators. This fixes case b1054
14147         # Here is the previous simplified version:
14148         ## my @q = qw( . : ? and or xor && || );
14149         my @q = @all_operators;
14150
14151         push @q, ',';
14152         @break_before_or_after_token{@q} = (1) x scalar(@q);
14153     }
14154
14155     # This is no longer called - global vars - moved into initialize_batch_vars
14156     sub initialize_forced_breakpoint_vars {
14157         $forced_breakpoint_count      = 0;
14158         $index_max_forced_break       = UNDEFINED_INDEX;
14159         $forced_breakpoint_undo_count = 0;
14160         ##@forced_breakpoint_undo_stack = (); # not needed
14161         return;
14162     }
14163
14164     sub set_fake_breakpoint {
14165
14166         # Just bump up the breakpoint count as a signal that there are breaks.
14167         # This is useful if we have breaks but may want to postpone deciding
14168         # where to make them.
14169         $forced_breakpoint_count++;
14170         return;
14171     }
14172
14173     use constant DEBUG_FORCE => 0;
14174
14175     sub set_forced_breakpoint {
14176         my ( $self, $i ) = @_;
14177
14178         # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
14179
14180         # Exceptions:
14181         # - If the token at index $i is a blank, backup to $i-1 to
14182         #   get to the previous nonblank token.
14183         # - For certain tokens, the break may be placed BEFORE the token
14184         #   at index $i, depending on user break preference settings.
14185         # - If a break is made after an opening token, then a break will
14186         #   also be made before the corresponding closing token.
14187
14188         # Returns '$i_nonblank':
14189         #   = index of the token after which the breakpoint was actually placed
14190         #   = undef if breakpoint was not set.
14191         my $i_nonblank;
14192
14193         if ( !defined($i) || $i < 0 ) {
14194
14195             # Calls with bad index $i are harmless but waste time and should
14196             # be caught and eliminated during code development.
14197             if (DEVEL_MODE) {
14198                 my ( $a, $b, $c ) = caller();
14199                 Fault(
14200 "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
14201                 );
14202             }
14203             return;
14204         }
14205
14206         # Break after token $i
14207         $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
14208
14209         # If we break at an opening container..break at the closing
14210         my $set_closing;
14211         if ( defined($i_nonblank)
14212             && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
14213         {
14214             $set_closing = 1;
14215             $self->set_closing_breakpoint($i_nonblank);
14216         }
14217
14218         DEBUG_FORCE && do {
14219             my ( $a, $b, $c ) = caller();
14220             my $msg =
14221 "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
14222             if ( !defined($i_nonblank) ) {
14223                 $i = EMPTY_STRING unless defined($i);
14224                 $msg .= " but could not set break after i='$i'\n";
14225             }
14226             else {
14227                 $msg .= <<EOM;
14228 set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
14229 EOM
14230                 if ( defined($set_closing) ) {
14231                     $msg .=
14232 " Also set closing breakpoint corresponding to this token\n";
14233                 }
14234             }
14235             print STDOUT $msg;
14236         };
14237
14238         return $i_nonblank;
14239     } ## end sub set_forced_breakpoint
14240
14241     sub set_forced_breakpoint_AFTER {
14242         my ( $self, $i ) = @_;
14243
14244         # This routine is only called by sub set_forced_breakpoint and
14245         # sub set_closing_breakpoint.
14246
14247         # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
14248
14249         # Exceptions:
14250         # - If the token at index $i is a blank, backup to $i-1 to
14251         #   get to the previous nonblank token.
14252         # - For certain tokens, the break may be placed BEFORE the token
14253         #   at index $i, depending on user break preference settings.
14254
14255         # Returns:
14256         #   - the index of the token after which the break was set, or
14257         #   - undef if no break was set
14258
14259         return unless ( defined($i) && $i >= 0 );
14260
14261         # Back up at a blank so we have a token to examine.
14262         # This was added to fix for cases like b932 involving an '=' break.
14263         if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
14264
14265         # Never break between welded tokens
14266         return
14267           if ( $total_weld_count
14268             && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
14269
14270         my $token = $tokens_to_go[$i];
14271         my $type  = $types_to_go[$i];
14272
14273         # For certain tokens, use user settings to decide if we break before or
14274         # after it
14275         if ( $break_before_or_after_token{$token}
14276             && ( $type eq $token || $type eq 'k' ) )
14277         {
14278             if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14279         }
14280
14281         # breaks are forced before 'if' and 'unless'
14282         elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
14283
14284         if ( $i >= 0 && $i <= $max_index_to_go ) {
14285             my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14286
14287             if (   $i_nonblank >= 0
14288                 && $nobreak_to_go[$i_nonblank] == 0
14289                 && !$forced_breakpoint_to_go[$i_nonblank] )
14290             {
14291                 $forced_breakpoint_to_go[$i_nonblank] = 1;
14292
14293                 if ( $i_nonblank > $index_max_forced_break ) {
14294                     $index_max_forced_break = $i_nonblank;
14295                 }
14296                 $forced_breakpoint_count++;
14297                 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
14298                   = $i_nonblank;
14299
14300                 # success
14301                 return $i_nonblank;
14302             }
14303         }
14304         return;
14305     } ## end sub set_forced_breakpoint_AFTER
14306
14307     sub clear_breakpoint_undo_stack {
14308         my ($self) = @_;
14309         $forced_breakpoint_undo_count = 0;
14310         return;
14311     }
14312
14313     use constant DEBUG_UNDOBP => 0;
14314
14315     sub undo_forced_breakpoint_stack {
14316
14317         my ( $self, $i_start ) = @_;
14318
14319         # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
14320         # remove all breakpoints from the top of the 'undo stack' down to and
14321         # including index $i_start.
14322
14323         # The 'undo stack' is a stack of all breakpoints made for a batch of
14324         # code.
14325
14326         if ( $i_start < 0 ) {
14327             $i_start = 0;
14328             my ( $a, $b, $c ) = caller();
14329
14330             # Bad call, can only be due to a recent programming change.
14331             Fault(
14332 "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
14333             ) if (DEVEL_MODE);
14334             return;
14335         }
14336
14337         while ( $forced_breakpoint_undo_count > $i_start ) {
14338             my $i =
14339               $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14340             if ( $i >= 0 && $i <= $max_index_to_go ) {
14341                 $forced_breakpoint_to_go[$i] = 0;
14342                 $forced_breakpoint_count--;
14343
14344                 DEBUG_UNDOBP && do {
14345                     my ( $a, $b, $c ) = caller();
14346                     print STDOUT
14347 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
14348                 };
14349             }
14350
14351             # shouldn't happen, but not a critical error
14352             else {
14353                 DEBUG_UNDOBP && do {
14354                     my ( $a, $b, $c ) = caller();
14355                     print STDOUT
14356 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
14357                 };
14358             }
14359         }
14360         return;
14361     } ## end sub undo_forced_breakpoint_stack
14362 } ## end closure set_forced_breakpoint
14363
14364 {    ## begin closure set_closing_breakpoint
14365
14366     my %postponed_breakpoint;
14367
14368     sub initialize_postponed_breakpoint {
14369         %postponed_breakpoint = ();
14370         return;
14371     }
14372
14373     sub has_postponed_breakpoint {
14374         my ($seqno) = @_;
14375         return $postponed_breakpoint{$seqno};
14376     }
14377
14378     sub set_closing_breakpoint {
14379
14380         # set a breakpoint at a matching closing token
14381         my ( $self, $i_break ) = @_;
14382
14383         if ( $mate_index_to_go[$i_break] >= 0 ) {
14384
14385             # Don't reduce the '2' in the statement below.
14386             # Test files: attrib.t, BasicLyx.pm.html
14387             if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
14388
14389              # break before } ] and ), but sub set_forced_breakpoint will decide
14390              # to break before or after a ? and :
14391                 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
14392                 $self->set_forced_breakpoint_AFTER(
14393                     $mate_index_to_go[$i_break] - $inc );
14394             }
14395         }
14396         else {
14397             my $type_sequence = $type_sequence_to_go[$i_break];
14398             if ($type_sequence) {
14399                 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
14400                 $postponed_breakpoint{$type_sequence} = 1;
14401             }
14402         }
14403         return;
14404     } ## end sub set_closing_breakpoint
14405 } ## end closure set_closing_breakpoint
14406
14407 #########################################
14408 # CODE SECTION 9: Process batches of code
14409 #########################################
14410
14411 {    ## begin closure grind_batch_of_CODE
14412
14413     # The routines in this closure begin the processing of a 'batch' of code.
14414
14415     # A variable to keep track of consecutive nonblank lines so that we can
14416     # insert occasional blanks
14417     my @nonblank_lines_at_depth;
14418
14419     # A variable to remember maximum size of previous batches; this is needed
14420     # by the logical padding routine
14421     my $peak_batch_size;
14422     my $batch_count;
14423
14424     # variables to keep track of unbalanced containers.
14425     my %saved_opening_indentation;
14426     my @unmatched_opening_indexes_in_this_batch;
14427
14428     sub initialize_grind_batch_of_CODE {
14429         @nonblank_lines_at_depth   = ();
14430         $peak_batch_size           = 0;
14431         $batch_count               = 0;
14432         %saved_opening_indentation = ();
14433         return;
14434     }
14435
14436     # sub grind_batch_of_CODE receives sections of code which are the longest
14437     # possible lines without a break.  In other words, it receives what is left
14438     # after applying all breaks forced by blank lines, block comments, side
14439     # comments, pod text, and structural braces.  Its job is to break this code
14440     # down into smaller pieces, if necessary, which fit within the maximum
14441     # allowed line length.  Then it sends the resulting lines of code on down
14442     # the pipeline to the VerticalAligner package, breaking the code into
14443     # continuation lines as necessary.  The batch of tokens are in the "to_go"
14444     # arrays.  The name 'grind' is slightly suggestive of a machine continually
14445     # breaking down long lines of code, but mainly it is unique and easy to
14446     # remember and find with an editor search.
14447
14448     # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
14449     # together in the following way:
14450
14451     # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
14452     # combines them into the largest sequences of tokens which might form a new
14453     # line.
14454     # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
14455     # lines.
14456
14457     # So sub 'process_line_of_CODE' builds up the longest possible continuous
14458     # sequences of tokens, regardless of line length, and then
14459     # grind_batch_of_CODE breaks these sequences back down into the new output
14460     # lines.
14461
14462     # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
14463
14464     use constant DEBUG_GRIND => 0;
14465
14466     sub check_grind_input {
14467
14468         # Check for valid input to sub grind_batch_of_CODE.  An error here
14469         # would most likely be due to an error in 'sub store_token_to_go'.
14470         my ($self) = @_;
14471
14472         # Be sure there are tokens in the batch
14473         if ( $max_index_to_go < 0 ) {
14474             Fault(<<EOM);
14475 sub grind incorrectly called with max_index_to_go=$max_index_to_go
14476 EOM
14477         }
14478         my $Klimit = $self->[_Klimit_];
14479
14480         # The local batch tokens must be a continuous part of the global token
14481         # array.
14482         my $KK;
14483         foreach my $ii ( 0 .. $max_index_to_go ) {
14484
14485             my $Km = $KK;
14486
14487             $KK = $K_to_go[$ii];
14488             if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
14489                 $KK = '(undef)' unless defined($KK);
14490                 Fault(<<EOM);
14491 at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
14492 EOM
14493             }
14494
14495             if ( $ii > 0 && $KK != $Km + 1 ) {
14496                 my $im = $ii - 1;
14497                 Fault(<<EOM);
14498 Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK;  expecting K = Km+1
14499 EOM
14500             }
14501         }
14502         return;
14503     } ## end sub check_grind_input
14504
14505     sub grind_batch_of_CODE {
14506
14507         my ($self) = @_;
14508
14509         my $this_batch = $self->[_this_batch_];
14510         $batch_count++;
14511
14512         $self->check_grind_input() if (DEVEL_MODE);
14513
14514         # This routine is only called from sub flush_batch_of_code, so that
14515         # routine is a better spot for debugging.
14516         DEBUG_GRIND && do {
14517             my $token = my $type = EMPTY_STRING;
14518             if ( $max_index_to_go >= 0 ) {
14519                 $token = $tokens_to_go[$max_index_to_go];
14520                 $type  = $types_to_go[$max_index_to_go];
14521             }
14522             my $output_str = EMPTY_STRING;
14523             if ( $max_index_to_go > 20 ) {
14524                 my $mm = $max_index_to_go - 10;
14525                 $output_str =
14526                   join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
14527                   . join( EMPTY_STRING,
14528                     @tokens_to_go[ $mm .. $max_index_to_go ] );
14529             }
14530             else {
14531                 $output_str = join EMPTY_STRING,
14532                   @tokens_to_go[ 0 .. $max_index_to_go ];
14533             }
14534             print STDERR <<EOM;
14535 grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
14536 $output_str
14537 EOM
14538         };
14539
14540         return if ( $max_index_to_go < 0 );
14541
14542         $self->set_lp_indentation()
14543           if ($rOpts_line_up_parentheses);
14544
14545         #----------------------------
14546         # Shortcut for block comments
14547         #----------------------------
14548         if (
14549                $max_index_to_go == 0
14550             && $types_to_go[0] eq '#'
14551
14552             # this shortcut does not work for -lp yet
14553             && !$rOpts_line_up_parentheses
14554           )
14555         {
14556             my $ibeg = 0;
14557             $this_batch->[_ri_first_]                 = [$ibeg];
14558             $this_batch->[_ri_last_]                  = [$ibeg];
14559             $this_batch->[_peak_batch_size_]          = $peak_batch_size;
14560             $this_batch->[_do_not_pad_]               = 0;
14561             $this_batch->[_batch_count_]              = $batch_count;
14562             $this_batch->[_rix_seqno_controlling_ci_] = [];
14563
14564             $self->convey_batch_to_vertical_aligner();
14565
14566             my $level = $levels_to_go[$ibeg];
14567             $self->[_last_last_line_leading_level_] =
14568               $self->[_last_line_leading_level_];
14569             $self->[_last_line_leading_type_]  = $types_to_go[$ibeg];
14570             $self->[_last_line_leading_level_] = $level;
14571             $nonblank_lines_at_depth[$level]   = 1;
14572             return;
14573         }
14574
14575         #-------------
14576         # Normal route
14577         #-------------
14578
14579         my $rLL                      = $self->[_rLL_];
14580         my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
14581         my $rwant_container_open     = $self->[_rwant_container_open_];
14582
14583         #-------------------------------------------------------
14584         # Loop over the batch to initialize some batch variables
14585         #-------------------------------------------------------
14586         my $comma_count_in_batch = 0;
14587         my $ilast_nonblank       = -1;
14588         my @colon_list;
14589         my @ix_seqno_controlling_ci;
14590         my %comma_arrow_count;
14591         my $comma_arrow_count_contained = 0;
14592         my @unmatched_closing_indexes_in_this_batch;
14593
14594         @unmatched_opening_indexes_in_this_batch = ();
14595
14596         foreach my $i ( 0 .. $max_index_to_go ) {
14597             $iprev_to_go[$i] = $ilast_nonblank;
14598             $inext_to_go[$i] = $i + 1;
14599
14600             my $type = $types_to_go[$i];
14601             if ( $type ne 'b' ) {
14602                 if ( $ilast_nonblank >= 0 ) {
14603                     $inext_to_go[$ilast_nonblank] = $i;
14604
14605                     # just in case there are two blanks in a row (shouldn't
14606                     # happen)
14607                     if ( ++$ilast_nonblank < $i ) {
14608                         $inext_to_go[$ilast_nonblank] = $i;
14609                     }
14610                 }
14611                 $ilast_nonblank = $i;
14612
14613                 # This is a good spot to efficiently collect information needed
14614                 # for breaking lines...
14615
14616                 # gather info needed by sub break_long_lines
14617                 if ( $type_sequence_to_go[$i] ) {
14618                     my $seqno = $type_sequence_to_go[$i];
14619                     my $token = $tokens_to_go[$i];
14620
14621                     # remember indexes of any tokens controlling xci
14622                     # in this batch. This list is needed by sub undo_ci.
14623                     if ( $ris_seqno_controlling_ci->{$seqno} ) {
14624                         push @ix_seqno_controlling_ci, $i;
14625                     }
14626
14627                     if ( $is_opening_sequence_token{$token} ) {
14628                         if ( $rwant_container_open->{$seqno} ) {
14629                             $self->set_forced_breakpoint($i);
14630                         }
14631                         push @unmatched_opening_indexes_in_this_batch, $i;
14632                         if ( $type eq '?' ) {
14633                             push @colon_list, $type;
14634                         }
14635                     }
14636                     elsif ( $is_closing_sequence_token{$token} ) {
14637
14638                         if ( $i > 0 && $rwant_container_open->{$seqno} ) {
14639                             $self->set_forced_breakpoint( $i - 1 );
14640                         }
14641
14642                         my $i_mate =
14643                           pop @unmatched_opening_indexes_in_this_batch;
14644                         if ( defined($i_mate) && $i_mate >= 0 ) {
14645                             if ( $type_sequence_to_go[$i_mate] ==
14646                                 $type_sequence_to_go[$i] )
14647                             {
14648                                 $mate_index_to_go[$i]      = $i_mate;
14649                                 $mate_index_to_go[$i_mate] = $i;
14650                                 if ( $comma_arrow_count{$seqno} ) {
14651                                     $comma_arrow_count_contained +=
14652                                       $comma_arrow_count{$seqno};
14653                                 }
14654                             }
14655                             else {
14656                                 push @unmatched_opening_indexes_in_this_batch,
14657                                   $i_mate;
14658                                 push @unmatched_closing_indexes_in_this_batch,
14659                                   $i;
14660                             }
14661                         }
14662                         else {
14663                             push @unmatched_closing_indexes_in_this_batch, $i;
14664                         }
14665                         if ( $type eq ':' ) {
14666                             push @colon_list, $type;
14667                         }
14668                     } ## end elsif ( $is_closing_sequence_token...)
14669
14670                 } ## end if ($seqno)
14671
14672                 elsif ( $type eq ',' ) { $comma_count_in_batch++; }
14673                 elsif ( $tokens_to_go[$i] eq '=>' ) {
14674                     if (@unmatched_opening_indexes_in_this_batch) {
14675                         my $j = $unmatched_opening_indexes_in_this_batch[-1];
14676                         my $seqno = $type_sequence_to_go[$j];
14677                         $comma_arrow_count{$seqno}++;
14678                     }
14679                 }
14680             } ## end if ( $type ne 'b' )
14681         } ## end for ( my $i = 0 ; $i <=...)
14682
14683         my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
14684           @unmatched_closing_indexes_in_this_batch;
14685
14686         #------------------------
14687         # Set special breakpoints
14688         #------------------------
14689         # If this line ends in a code block brace, set breaks at any
14690         # previous closing code block braces to breakup a chain of code
14691         # blocks on one line.  This is very rare but can happen for
14692         # user-defined subs.  For example we might be looking at this:
14693         #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
14694         my $saw_good_break = 0;    # flag to force breaks even if short line
14695         if (
14696
14697             # looking for opening or closing block brace
14698             $block_type_to_go[$max_index_to_go]
14699
14700             # never any good breaks if just one token
14701             && $max_index_to_go > 0
14702
14703             # but not one of these which are never duplicated on a line:
14704             # until|while|for|if|elsif|else
14705             && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
14706             }
14707           )
14708         {
14709             my $lev = $nesting_depth_to_go[$max_index_to_go];
14710
14711             # Walk backwards from the end and
14712             # set break at any closing block braces at the same level.
14713             # But quit if we are not in a chain of blocks.
14714             foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
14715                 last if ( $levels_to_go[$i] < $lev );   # stop at a lower level
14716                 next if ( $levels_to_go[$i] > $lev );   # skip past higher level
14717
14718                 if ( $block_type_to_go[$i] ) {
14719                     if ( $tokens_to_go[$i] eq '}' ) {
14720                         $self->set_forced_breakpoint($i);
14721                         $saw_good_break = 1;
14722                     }
14723                 }
14724
14725                 # quit if we see anything besides words, function, blanks
14726                 # at this level
14727                 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
14728             }
14729         }
14730
14731         #-----------------------------------------------
14732         # insertion of any blank lines before this batch
14733         #-----------------------------------------------
14734
14735         my $imin = 0;
14736         my $imax = $max_index_to_go;
14737
14738         # trim any blank tokens
14739         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14740         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14741
14742         if ( $imin > $imax ) {
14743             if (DEVEL_MODE) {
14744                 my $K0  = $K_to_go[0];
14745                 my $lno = EMPTY_STRING;
14746                 if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
14747                 Fault(<<EOM);
14748 Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
14749 EOM
14750             }
14751             return;
14752         }
14753
14754         my $last_line_leading_type  = $self->[_last_line_leading_type_];
14755         my $last_line_leading_level = $self->[_last_line_leading_level_];
14756         my $last_last_line_leading_level =
14757           $self->[_last_last_line_leading_level_];
14758
14759         # add a blank line before certain key types but not after a comment
14760         if ( $last_line_leading_type ne '#' ) {
14761             my $want_blank    = 0;
14762             my $leading_token = $tokens_to_go[$imin];
14763             my $leading_type  = $types_to_go[$imin];
14764
14765             # break before certain key blocks except one-liners
14766             if ( $leading_type eq 'k' ) {
14767                 if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
14768                     $want_blank = $rOpts->{'blank-lines-before-subs'}
14769                       if ( terminal_type_i( $imin, $imax ) ne '}' );
14770                 }
14771
14772                 # Break before certain block types if we haven't had a
14773                 # break at this level for a while.  This is the
14774                 # difficult decision..
14775                 elsif ($last_line_leading_type ne 'b'
14776                     && $is_if_unless_while_until_for_foreach{$leading_token} )
14777                 {
14778                     my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
14779                     if ( !defined($lc) ) { $lc = 0 }
14780
14781                     # patch for RT #128216: no blank line inserted at a level
14782                     # change
14783                     if ( $levels_to_go[$imin] != $last_line_leading_level ) {
14784                         $lc = 0;
14785                     }
14786
14787                     $want_blank =
14788                          $rOpts->{'blanks-before-blocks'}
14789                       && $lc >= $rOpts->{'long-block-line-count'}
14790                       && $self->consecutive_nonblank_lines() >=
14791                       $rOpts->{'long-block-line-count'}
14792                       && terminal_type_i( $imin, $imax ) ne '}';
14793                 }
14794             }
14795
14796             # blank lines before subs except declarations and one-liners
14797             elsif ( $leading_type eq 'i' ) {
14798                 if (
14799
14800                     # quick check
14801                     (
14802                         substr( $leading_token, 0, 3 ) eq 'sub'
14803                         || $rOpts_sub_alias_list
14804                     )
14805
14806                     # slow check
14807                     && $leading_token =~ /$SUB_PATTERN/
14808                   )
14809                 {
14810                     $want_blank = $rOpts->{'blank-lines-before-subs'}
14811                       if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
14812                 }
14813
14814                 # break before all package declarations
14815                 elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
14816                     $want_blank = $rOpts->{'blank-lines-before-packages'};
14817                 }
14818             }
14819
14820             # Check for blank lines wanted before a closing brace
14821             elsif ( $leading_token eq '}' ) {
14822                 if (   $rOpts->{'blank-lines-before-closing-block'}
14823                     && $block_type_to_go[$imin]
14824                     && $block_type_to_go[$imin] =~
14825                     /$blank_lines_before_closing_block_pattern/ )
14826                 {
14827                     my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
14828                     if ( $nblanks > $want_blank ) {
14829                         $want_blank = $nblanks;
14830                     }
14831                 }
14832             }
14833
14834             if ($want_blank) {
14835
14836                 # future: send blank line down normal path to VerticalAligner
14837                 $self->flush_vertical_aligner();
14838                 my $file_writer_object = $self->[_file_writer_object_];
14839                 $file_writer_object->require_blank_code_lines($want_blank);
14840             }
14841         }
14842
14843         # update blank line variables and count number of consecutive
14844         # non-blank, non-comment lines at this level
14845         $last_last_line_leading_level = $last_line_leading_level;
14846         $last_line_leading_level      = $levels_to_go[$imin];
14847         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
14848         $last_line_leading_type = $types_to_go[$imin];
14849         if (   $last_line_leading_level == $last_last_line_leading_level
14850             && $last_line_leading_type ne 'b'
14851             && $last_line_leading_type ne '#'
14852             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
14853         {
14854             $nonblank_lines_at_depth[$last_line_leading_level]++;
14855         }
14856         else {
14857             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
14858         }
14859
14860         $self->[_last_line_leading_type_]       = $last_line_leading_type;
14861         $self->[_last_line_leading_level_]      = $last_line_leading_level;
14862         $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
14863
14864         #--------------------------
14865         # scan lists and long lines
14866         #--------------------------
14867
14868         # Flag to remember if we called sub 'pad_array_to_go'.
14869         # Some routines (break_lists(), break_long_lines() ) need some
14870         # extra tokens added at the end of the batch.  Most batches do not
14871         # use these routines, so we will avoid calling 'pad_array_to_go'
14872         # unless it is needed.
14873         my $called_pad_array_to_go;
14874
14875         # set all forced breakpoints for good list formatting
14876         my $is_long_line = $max_index_to_go > 0
14877           && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
14878
14879         my $old_line_count_in_batch = 1;
14880         if ( $max_index_to_go > 0 ) {
14881             my $Kbeg = $K_to_go[0];
14882             my $Kend = $K_to_go[$max_index_to_go];
14883             $old_line_count_in_batch +=
14884               $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
14885         }
14886
14887         my $rbond_strength_bias = [];
14888         if (
14889                $is_long_line
14890             || $old_line_count_in_batch > 1
14891
14892             # must always call break_lists() with unbalanced batches because
14893             # it is maintaining some stacks
14894             || $is_unbalanced_batch
14895
14896             # call break_lists if we might want to break at commas
14897             || (
14898                 $comma_count_in_batch
14899                 && (   $rOpts_maximum_fields_per_table > 0
14900                     && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
14901                     || $rOpts_comma_arrow_breakpoints == 0 )
14902             )
14903
14904             # call break_lists if user may want to break open some one-line
14905             # hash references
14906             || (   $comma_arrow_count_contained
14907                 && $rOpts_comma_arrow_breakpoints != 3 )
14908           )
14909         {
14910             # add a couple of extra terminal blank tokens
14911             $self->pad_array_to_go();
14912             $called_pad_array_to_go = 1;
14913
14914             my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
14915             $saw_good_break ||= $sgb;
14916         }
14917
14918         # let $ri_first and $ri_last be references to lists of
14919         # first and last tokens of line fragments to output..
14920         my ( $ri_first, $ri_last );
14921
14922         #-------------------------
14923         # write a single line if..
14924         #-------------------------
14925         if (
14926
14927             # we aren't allowed to add any newlines
14928             !$rOpts_add_newlines
14929
14930             # or,
14931             || (
14932
14933                 # this line is 'short'
14934                 !$is_long_line
14935
14936                 # and we didn't see a good breakpoint
14937                 && !$saw_good_break
14938
14939                 # and we don't already have an interior breakpoint
14940                 && !$forced_breakpoint_count
14941             )
14942           )
14943         {
14944             @{$ri_first} = ($imin);
14945             @{$ri_last}  = ($imax);
14946         }
14947
14948         #-----------------------------
14949         # otherwise use multiple lines
14950         #-----------------------------
14951         else {
14952
14953             # add a couple of extra terminal blank tokens if we haven't
14954             # already done so
14955             $self->pad_array_to_go() unless ($called_pad_array_to_go);
14956
14957             ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
14958               $self->break_long_lines( $saw_good_break, \@colon_list,
14959                 $rbond_strength_bias );
14960
14961             $self->break_all_chain_tokens( $ri_first, $ri_last );
14962
14963             $self->break_equals( $ri_first, $ri_last );
14964
14965             # now we do a correction step to clean this up a bit
14966             # (The only time we would not do this is for debugging)
14967             $self->recombine_breakpoints( $ri_first, $ri_last,
14968                 $rbond_strength_to_go )
14969               if ( $rOpts_recombine && @{$ri_first} > 1 );
14970
14971             $self->insert_final_ternary_breaks( $ri_first, $ri_last )
14972               if (@colon_list);
14973         }
14974
14975         $self->insert_breaks_before_list_opening_containers( $ri_first,
14976             $ri_last )
14977           if ( %break_before_container_types && $max_index_to_go > 0 );
14978
14979         #-------------------
14980         # -lp corrector step
14981         #-------------------
14982         my $do_not_pad = 0;
14983         if ($rOpts_line_up_parentheses) {
14984             $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
14985         }
14986
14987         #--------------------------
14988         # unmask phantom semicolons
14989         #--------------------------
14990         if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
14991             my $i       = $imax;
14992             my $tok     = ';';
14993             my $tok_len = 1;
14994             if ( $want_left_space{';'} != WS_NO ) {
14995                 $tok     = ' ;';
14996                 $tok_len = 2;
14997             }
14998             $tokens_to_go[$i]        = $tok;
14999             $token_lengths_to_go[$i] = $tok_len;
15000             my $KK = $K_to_go[$i];
15001             $rLL->[$KK]->[_TOKEN_]        = $tok;
15002             $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
15003             my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
15004             $self->note_added_semicolon($line_number);
15005
15006             foreach ( $imax .. $max_index_to_go ) {
15007                 $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
15008             }
15009         }
15010
15011         if ( $rOpts_one_line_block_semicolons == 0 ) {
15012             $self->delete_one_line_semicolons( $ri_first, $ri_last );
15013         }
15014
15015         #--------------------
15016         # ship this batch out
15017         #--------------------
15018         $this_batch->[_ri_first_]                 = $ri_first;
15019         $this_batch->[_ri_last_]                  = $ri_last;
15020         $this_batch->[_peak_batch_size_]          = $peak_batch_size;
15021         $this_batch->[_do_not_pad_]               = $do_not_pad;
15022         $this_batch->[_batch_count_]              = $batch_count;
15023         $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
15024
15025         $self->convey_batch_to_vertical_aligner();
15026
15027         #-------------------------------------------------------------------
15028         # Write requested number of blank lines after an opening block brace
15029         #-------------------------------------------------------------------
15030         if ($rOpts_blank_lines_after_opening_block) {
15031             my $iterm = $imax;
15032             if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
15033                 $iterm -= 1;
15034                 if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
15035                     $iterm -= 1;
15036                 }
15037             }
15038
15039             if (   $types_to_go[$iterm] eq '{'
15040                 && $block_type_to_go[$iterm]
15041                 && $block_type_to_go[$iterm] =~
15042                 /$blank_lines_after_opening_block_pattern/ )
15043             {
15044                 my $nblanks = $rOpts_blank_lines_after_opening_block;
15045                 $self->flush_vertical_aligner();
15046                 my $file_writer_object = $self->[_file_writer_object_];
15047                 $file_writer_object->require_blank_code_lines($nblanks);
15048             }
15049         }
15050
15051         # Remember the largest batch size processed. This is needed by the
15052         # logical padding routine to avoid padding the first nonblank token
15053         if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
15054             $peak_batch_size = $max_index_to_go;
15055         }
15056
15057         return;
15058     } ## end sub grind_batch_of_CODE
15059
15060     sub save_opening_indentation {
15061
15062         # This should be called after each batch of tokens is output. It
15063         # saves indentations of lines of all unmatched opening tokens.
15064         # These will be used by sub get_opening_indentation.
15065
15066         my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
15067
15068         # QW INDENTATION PATCH 1:
15069         # Also save indentation for multiline qw quotes
15070         my @i_qw;
15071         my $seqno_qw_opening;
15072         if ( $types_to_go[$max_index_to_go] eq 'q' ) {
15073             my $KK = $K_to_go[$max_index_to_go];
15074             $seqno_qw_opening =
15075               $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
15076             if ($seqno_qw_opening) {
15077                 push @i_qw, $max_index_to_go;
15078             }
15079         }
15080
15081         # we need to save indentations of any unmatched opening tokens
15082         # in this batch because we may need them in a subsequent batch.
15083         foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
15084
15085             my $seqno = $type_sequence_to_go[$_];
15086
15087             if ( !$seqno ) {
15088                 if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
15089                     $seqno = $seqno_qw_opening;
15090                 }
15091                 else {
15092
15093                     # shouldn't happen
15094                     $seqno = 'UNKNOWN';
15095                 }
15096             }
15097
15098             $saved_opening_indentation{$seqno} = [
15099                 lookup_opening_indentation(
15100                     $_, $ri_first, $ri_last, $rindentation_list
15101                 )
15102             ];
15103         }
15104         return;
15105     } ## end sub save_opening_indentation
15106
15107     sub get_saved_opening_indentation {
15108         my ($seqno) = @_;
15109         my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
15110
15111         if ($seqno) {
15112             if ( $saved_opening_indentation{$seqno} ) {
15113                 ( $indent, $offset, $is_leading ) =
15114                   @{ $saved_opening_indentation{$seqno} };
15115                 $exists = 1;
15116             }
15117         }
15118
15119         # some kind of serious error it doesn't exist
15120         # (example is badfile.t)
15121
15122         return ( $indent, $offset, $is_leading, $exists );
15123     } ## end sub get_saved_opening_indentation
15124 } ## end closure grind_batch_of_CODE
15125
15126 sub lookup_opening_indentation {
15127
15128     # get the indentation of the line in the current output batch
15129     # which output a selected opening token
15130     #
15131     # given:
15132     #   $i_opening - index of an opening token in the current output batch
15133     #                whose line indentation we need
15134     #   $ri_first - reference to list of the first index $i for each output
15135     #               line in this batch
15136     #   $ri_last - reference to list of the last index $i for each output line
15137     #              in this batch
15138     #   $rindentation_list - reference to a list containing the indentation
15139     #            used for each line.  (NOTE: the first slot in
15140     #            this list is the last returned line number, and this is
15141     #            followed by the list of indentations).
15142     #
15143     # return
15144     #   -the indentation of the line which contained token $i_opening
15145     #   -and its offset (number of columns) from the start of the line
15146
15147     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
15148
15149     if ( !@{$ri_last} ) {
15150
15151         # An error here implies a bug introduced by a recent program change.
15152         # Every batch of code has lines, so this should never happen.
15153         if (DEVEL_MODE) {
15154             Fault("Error in opening_indentation: no lines");
15155         }
15156         return ( 0, 0, 0 );
15157     }
15158
15159     my $nline = $rindentation_list->[0];    # line number of previous lookup
15160
15161     # reset line location if necessary
15162     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
15163
15164     # find the correct line
15165     unless ( $i_opening > $ri_last->[-1] ) {
15166         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
15167     }
15168
15169     # Error - token index is out of bounds - shouldn't happen
15170     # A program bug has been introduced in one of the calling routines.
15171     # We better stop here.
15172     else {
15173         my $i_last_line = $ri_last->[-1];
15174         if (DEVEL_MODE) {
15175             Fault(<<EOM);
15176 Program bug in call to lookup_opening_indentation - index out of range
15177  called with index i_opening=$i_opening  > $i_last_line = max index of last line
15178 This batch has max index = $max_index_to_go,
15179 EOM
15180         }
15181         $nline = $#{$ri_last};
15182     }
15183
15184     $rindentation_list->[0] =
15185       $nline;    # save line number to start looking next call
15186     my $ibeg       = $ri_start->[$nline];
15187     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
15188     my $is_leading = ( $ibeg == $i_opening );
15189     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
15190 } ## end sub lookup_opening_indentation
15191
15192 sub terminal_type_i {
15193
15194     #  returns type of last token on this line (terminal token), as follows:
15195     #  returns # for a full-line comment
15196     #  returns ' ' for a blank line
15197     #  otherwise returns final token type
15198
15199     my ( $ibeg, $iend ) = @_;
15200
15201     # Start at the end and work backwards
15202     my $i      = $iend;
15203     my $type_i = $types_to_go[$i];
15204
15205     # Check for side comment
15206     if ( $type_i eq '#' ) {
15207         $i--;
15208         if ( $i < $ibeg ) {
15209             return wantarray ? ( $type_i, $ibeg ) : $type_i;
15210         }
15211         $type_i = $types_to_go[$i];
15212     }
15213
15214     # Skip past a blank
15215     if ( $type_i eq 'b' ) {
15216         $i--;
15217         if ( $i < $ibeg ) {
15218             return wantarray ? ( $type_i, $ibeg ) : $type_i;
15219         }
15220         $type_i = $types_to_go[$i];
15221     }
15222
15223     # Found it..make sure it is a BLOCK termination,
15224     # but hide a terminal } after sort/map/grep/eval/do because it is not
15225     # necessarily the end of the line.  (terminal.t)
15226     my $block_type = $block_type_to_go[$i];
15227     if (
15228         $type_i eq '}'
15229         && (  !$block_type
15230             || $is_sort_map_grep_eval_do{$block_type} )
15231       )
15232     {
15233         $type_i = 'b';
15234     }
15235     return wantarray ? ( $type_i, $i ) : $type_i;
15236 } ## end sub terminal_type_i
15237
15238 sub pad_array_to_go {
15239
15240     # To simplify coding in break_lists and set_bond_strengths, it helps to
15241     # create some extra blank tokens at the end of the arrays.  We also add
15242     # some undef's to help guard against using invalid data.
15243     my ($self) = @_;
15244     $K_to_go[ $max_index_to_go + 1 ]             = undef;
15245     $tokens_to_go[ $max_index_to_go + 1 ]        = EMPTY_STRING;
15246     $tokens_to_go[ $max_index_to_go + 2 ]        = EMPTY_STRING;
15247     $tokens_to_go[ $max_index_to_go + 3 ]        = undef;
15248     $types_to_go[ $max_index_to_go + 1 ]         = 'b';
15249     $types_to_go[ $max_index_to_go + 2 ]         = 'b';
15250     $types_to_go[ $max_index_to_go + 3 ]         = undef;
15251     $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
15252     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
15253       $nesting_depth_to_go[$max_index_to_go];
15254
15255     #    /^[R\}\)\]]$/
15256     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
15257         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
15258
15259             # Nesting depths are set to be >=0 in sub write_line, so it should
15260             # not be possible to get here unless the code has a bracing error
15261             # which leaves a closing brace with zero nesting depth.
15262             unless ( get_saw_brace_error() ) {
15263                 if (DEVEL_MODE) {
15264                     Fault(<<EOM);
15265 Program bug in pad_array_to_go: hit nesting error which should have been caught
15266 EOM
15267                 }
15268             }
15269         }
15270         else {
15271             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
15272         }
15273     }
15274
15275     #       /^[L\{\(\[]$/
15276     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
15277         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
15278     }
15279     return;
15280 } ## end sub pad_array_to_go
15281
15282 sub break_all_chain_tokens {
15283
15284     # scan the current breakpoints looking for breaks at certain "chain
15285     # operators" (. : && || + etc) which often occur repeatedly in a long
15286     # statement.  If we see a break at any one, break at all similar tokens
15287     # within the same container.
15288     #
15289     my ( $self, $ri_left, $ri_right ) = @_;
15290
15291     my %saw_chain_type;
15292     my %left_chain_type;
15293     my %right_chain_type;
15294     my %interior_chain_type;
15295     my $nmax = @{$ri_right} - 1;
15296
15297     # scan the left and right end tokens of all lines
15298     my $count = 0;
15299     for my $n ( 0 .. $nmax ) {
15300         my $il    = $ri_left->[$n];
15301         my $ir    = $ri_right->[$n];
15302         my $typel = $types_to_go[$il];
15303         my $typer = $types_to_go[$ir];
15304         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
15305         $typer = '+' if ( $typer eq '-' );
15306         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
15307         $typer = '*' if ( $typer eq '/' );
15308
15309         my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
15310         my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
15311         if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
15312             next if ( $typel eq '?' );
15313             push @{ $left_chain_type{$keyl} }, $il;
15314             $saw_chain_type{$keyl} = 1;
15315             $count++;
15316         }
15317         if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
15318             next if ( $typer eq '?' );
15319             push @{ $right_chain_type{$keyr} }, $ir;
15320             $saw_chain_type{$keyr} = 1;
15321             $count++;
15322         }
15323     }
15324     return unless $count;
15325
15326     # now look for any interior tokens of the same types
15327     $count = 0;
15328     for my $n ( 0 .. $nmax ) {
15329         my $il = $ri_left->[$n];
15330         my $ir = $ri_right->[$n];
15331         foreach my $i ( $il + 1 .. $ir - 1 ) {
15332             my $type = $types_to_go[$i];
15333             my $key  = $type eq 'k' ? $tokens_to_go[$i] : $type;
15334             $key = '+' if ( $key eq '-' );
15335             $key = '*' if ( $key eq '/' );
15336             if ( $saw_chain_type{$key} ) {
15337                 push @{ $interior_chain_type{$key} }, $i;
15338                 $count++;
15339             }
15340         }
15341     }
15342     return unless $count;
15343
15344     # now make a list of all new break points
15345     my @insert_list;
15346
15347     # loop over all chain types
15348     foreach my $key ( keys %saw_chain_type ) {
15349
15350         # quit if just ONE continuation line with leading .  For example--
15351         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
15352         #  . $contents;
15353         last if ( $nmax == 1 && $key =~ /^[\.\+]$/ );
15354
15355         # loop over all interior chain tokens
15356         foreach my $itest ( @{ $interior_chain_type{$key} } ) {
15357
15358             # loop over all left end tokens of same type
15359             if ( $left_chain_type{$key} ) {
15360                 next if $nobreak_to_go[ $itest - 1 ];
15361                 foreach my $i ( @{ $left_chain_type{$key} } ) {
15362                     next unless $self->in_same_container_i( $i, $itest );
15363                     push @insert_list, $itest - 1;
15364
15365                     # Break at matching ? if this : is at a different level.
15366                     # For example, the ? before $THRf_DEAD in the following
15367                     # should get a break if its : gets a break.
15368                     #
15369                     # my $flags =
15370                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
15371                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
15372                     #   :              $THRf_R_JOINABLE;
15373                     if (   $key eq ':'
15374                         && $levels_to_go[$i] != $levels_to_go[$itest] )
15375                     {
15376                         my $i_question = $mate_index_to_go[$itest];
15377                         if ( $i_question > 0 ) {
15378                             push @insert_list, $i_question - 1;
15379                         }
15380                     }
15381                     last;
15382                 }
15383             }
15384
15385             # loop over all right end tokens of same type
15386             if ( $right_chain_type{$key} ) {
15387                 next if $nobreak_to_go[$itest];
15388                 foreach my $i ( @{ $right_chain_type{$key} } ) {
15389                     next unless $self->in_same_container_i( $i, $itest );
15390                     push @insert_list, $itest;
15391
15392                     # break at matching ? if this : is at a different level
15393                     if (   $key eq ':'
15394                         && $levels_to_go[$i] != $levels_to_go[$itest] )
15395                     {
15396                         my $i_question = $mate_index_to_go[$itest];
15397                         if ( $i_question >= 0 ) {
15398                             push @insert_list, $i_question;
15399                         }
15400                     }
15401                     last;
15402                 }
15403             }
15404         }
15405     }
15406
15407     # insert any new break points
15408     if (@insert_list) {
15409         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15410     }
15411     return;
15412 } ## end sub break_all_chain_tokens
15413
15414 sub insert_additional_breaks {
15415
15416     # this routine will add line breaks at requested locations after
15417     # sub break_long_lines has made preliminary breaks.
15418
15419     my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
15420     my $i_f;
15421     my $i_l;
15422     my $line_number = 0;
15423     foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
15424
15425         next if ( $nobreak_to_go[$i_break_left] );
15426
15427         $i_f = $ri_first->[$line_number];
15428         $i_l = $ri_last->[$line_number];
15429         while ( $i_break_left >= $i_l ) {
15430             $line_number++;
15431
15432             # shouldn't happen unless caller passes bad indexes
15433             if ( $line_number >= @{$ri_last} ) {
15434                 if (DEVEL_MODE) {
15435                     Fault(<<EOM);
15436 Non-fatal program bug: couldn't set break at $i_break_left
15437 EOM
15438                 }
15439                 return;
15440             }
15441             $i_f = $ri_first->[$line_number];
15442             $i_l = $ri_last->[$line_number];
15443         }
15444
15445         # Do not leave a blank at the end of a line; back up if necessary
15446         if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
15447
15448         my $i_break_right = $inext_to_go[$i_break_left];
15449         if (   $i_break_left >= $i_f
15450             && $i_break_left < $i_l
15451             && $i_break_right > $i_f
15452             && $i_break_right <= $i_l )
15453         {
15454             splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
15455             splice( @{$ri_last},  $line_number, 1, ( $i_break_left, $i_l ) );
15456         }
15457     }
15458     return;
15459 } ## end sub insert_additional_breaks
15460
15461 {    ## begin closure in_same_container_i
15462     my $ris_break_token;
15463     my $ris_comma_token;
15464
15465     BEGIN {
15466
15467         # all cases break on seeing commas at same level
15468         my @q = qw( => );
15469         push @q, ',';
15470         @{$ris_comma_token}{@q} = (1) x scalar(@q);
15471
15472         # Non-ternary text also breaks on seeing any of qw(? : || or )
15473         # Example: we would not want to break at any of these .'s
15474         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
15475         push @q, qw( or || ? : );
15476         @{$ris_break_token}{@q} = (1) x scalar(@q);
15477     }
15478
15479     sub in_same_container_i {
15480
15481         # Check to see if tokens at i1 and i2 are in the same container, and
15482         # not separated by certain characters: => , ? : || or
15483         # This is an interface between the _to_go arrays to the rLL array
15484         my ( $self, $i1, $i2 ) = @_;
15485
15486         # quick check
15487         my $parent_seqno_1 = $parent_seqno_to_go[$i1];
15488         return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
15489
15490         if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
15491         my $K1  = $K_to_go[$i1];
15492         my $K2  = $K_to_go[$i2];
15493         my $rLL = $self->[_rLL_];
15494
15495         my $depth_1 = $nesting_depth_to_go[$i1];
15496         return if ( $depth_1 < 0 );
15497
15498         # Shouldn't happen since i1 and i2 have same parent:
15499         return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
15500
15501         # Select character set to scan for
15502         my $type_1 = $types_to_go[$i1];
15503         my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
15504
15505         # Fast preliminary loop to verify that tokens are in the same container
15506         my $KK = $K1;
15507         while (1) {
15508             $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
15509             last if !defined($KK);
15510             last if ( $KK >= $K2 );
15511             my $ii      = $i1 + $KK - $K1;
15512             my $depth_i = $nesting_depth_to_go[$ii];
15513             return if ( $depth_i < $depth_1 );
15514             next   if ( $depth_i > $depth_1 );
15515             if ( $type_1 ne ':' ) {
15516                 my $tok_i = $tokens_to_go[$ii];
15517                 return if ( $tok_i eq '?' || $tok_i eq ':' );
15518             }
15519         }
15520
15521         # Slow loop checking for certain characters
15522
15523         #-----------------------------------------------------
15524         # This is potentially a slow routine and not critical.
15525         # For safety just give up for large differences.
15526         # See test file 'infinite_loop.txt'
15527         #-----------------------------------------------------
15528         return if ( $i2 - $i1 > 200 );
15529
15530         foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
15531
15532             my $depth_i = $nesting_depth_to_go[$ii];
15533             next   if ( $depth_i > $depth_1 );
15534             return if ( $depth_i < $depth_1 );
15535             my $tok_i = $tokens_to_go[$ii];
15536             return if ( $rbreak->{$tok_i} );
15537         }
15538         return 1;
15539     } ## end sub in_same_container_i
15540 } ## end closure in_same_container_i
15541
15542 sub break_equals {
15543
15544     # Look for assignment operators that could use a breakpoint.
15545     # For example, in the following snippet
15546     #
15547     #    $HOME = $ENV{HOME}
15548     #      || $ENV{LOGDIR}
15549     #      || $pw[7]
15550     #      || die "no home directory for user $<";
15551     #
15552     # we could break at the = to get this, which is a little nicer:
15553     #    $HOME =
15554     #         $ENV{HOME}
15555     #      || $ENV{LOGDIR}
15556     #      || $pw[7]
15557     #      || die "no home directory for user $<";
15558     #
15559     # The logic here follows the logic in set_logical_padding, which
15560     # will add the padding in the second line to improve alignment.
15561     #
15562     my ( $self, $ri_left, $ri_right ) = @_;
15563     my $nmax = @{$ri_right} - 1;
15564     return unless ( $nmax >= 2 );
15565
15566     # scan the left ends of first two lines
15567     my $tokbeg = EMPTY_STRING;
15568     my $depth_beg;
15569     for my $n ( 1 .. 2 ) {
15570         my $il     = $ri_left->[$n];
15571         my $typel  = $types_to_go[$il];
15572         my $tokenl = $tokens_to_go[$il];
15573         my $keyl   = $typel eq 'k' ? $tokenl : $typel;
15574
15575         my $has_leading_op = $is_chain_operator{$keyl};
15576         return unless ($has_leading_op);
15577         if ( $n > 1 ) {
15578             return
15579               unless ( $tokenl eq $tokbeg
15580                 && $nesting_depth_to_go[$il] eq $depth_beg );
15581         }
15582         $tokbeg    = $tokenl;
15583         $depth_beg = $nesting_depth_to_go[$il];
15584     }
15585
15586     # now look for any interior tokens of the same types
15587     my $il = $ri_left->[0];
15588     my $ir = $ri_right->[0];
15589
15590     # now make a list of all new break points
15591     my @insert_list;
15592     foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
15593         my $type = $types_to_go[$i];
15594         if (   $is_assignment{$type}
15595             && $nesting_depth_to_go[$i] eq $depth_beg )
15596         {
15597             if ( $want_break_before{$type} ) {
15598                 push @insert_list, $i - 1;
15599             }
15600             else {
15601                 push @insert_list, $i;
15602             }
15603         }
15604     }
15605
15606     # Break after a 'return' followed by a chain of operators
15607     #  return ( $^O !~ /win32|dos/i )
15608     #    && ( $^O ne 'VMS' )
15609     #    && ( $^O ne 'OS2' )
15610     #    && ( $^O ne 'MacOS' );
15611     # To give:
15612     #  return
15613     #       ( $^O !~ /win32|dos/i )
15614     #    && ( $^O ne 'VMS' )
15615     #    && ( $^O ne 'OS2' )
15616     #    && ( $^O ne 'MacOS' );
15617     my $i = 0;
15618     if (   $types_to_go[$i] eq 'k'
15619         && $tokens_to_go[$i] eq 'return'
15620         && $ir > $il
15621         && $nesting_depth_to_go[$i] eq $depth_beg )
15622     {
15623         push @insert_list, $i;
15624     }
15625
15626     return unless (@insert_list);
15627
15628     # One final check...
15629     # scan second and third lines and be sure there are no assignments
15630     # we want to avoid breaking at an = to make something like this:
15631     #    unless ( $icon =
15632     #           $html_icons{"$type-$state"}
15633     #        or $icon = $html_icons{$type}
15634     #        or $icon = $html_icons{$state} )
15635     for my $n ( 1 .. 2 ) {
15636         my $il_n = $ri_left->[$n];
15637         my $ir_n = $ri_right->[$n];
15638         foreach my $i ( $il_n + 1 .. $ir_n ) {
15639             my $type = $types_to_go[$i];
15640             return
15641               if ( $is_assignment{$type}
15642                 && $nesting_depth_to_go[$i] eq $depth_beg );
15643         }
15644     }
15645
15646     # ok, insert any new break point
15647     if (@insert_list) {
15648         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15649     }
15650     return;
15651 } ## end sub break_equals
15652
15653 {    ## begin closure recombine_breakpoints
15654
15655     # This routine is called once per batch to see if it would be better
15656     # to combine some of the lines into which the batch has been broken.
15657
15658     my %is_amp_amp;
15659     my %is_math_op;
15660     my %is_plus_minus;
15661     my %is_mult_div;
15662
15663     BEGIN {
15664
15665         my @q;
15666         @q = qw( && || );
15667         @is_amp_amp{@q} = (1) x scalar(@q);
15668
15669         @q = qw( + - * / );
15670         @is_math_op{@q} = (1) x scalar(@q);
15671
15672         @q = qw( + - );
15673         @is_plus_minus{@q} = (1) x scalar(@q);
15674
15675         @q = qw( * / );
15676         @is_mult_div{@q} = (1) x scalar(@q);
15677     }
15678
15679     sub Debug_dump_breakpoints {
15680
15681         # Debug routine to dump current breakpoints...not normally called
15682         # We are given indexes to the current lines:
15683         # $ri_beg = ref to array of BEGinning indexes of each line
15684         # $ri_end = ref to array of ENDing indexes of each line
15685         my ( $self, $ri_beg, $ri_end, $msg ) = @_;
15686         print STDERR "----Dumping breakpoints from: $msg----\n";
15687         for my $n ( 0 .. @{$ri_end} - 1 ) {
15688             my $ibeg = $ri_beg->[$n];
15689             my $iend = $ri_end->[$n];
15690             my $text = EMPTY_STRING;
15691             foreach my $i ( $ibeg .. $iend ) {
15692                 $text .= $tokens_to_go[$i];
15693             }
15694             print STDERR "$n ($ibeg:$iend) $text\n";
15695         }
15696         print STDERR "----\n";
15697         return;
15698     } ## end sub Debug_dump_breakpoints
15699
15700     sub delete_one_line_semicolons {
15701
15702         my ( $self, $ri_beg, $ri_end ) = @_;
15703         my $rLL                 = $self->[_rLL_];
15704         my $K_opening_container = $self->[_K_opening_container_];
15705
15706         # Walk down the lines of this batch and delete any semicolons
15707         # terminating one-line blocks;
15708         my $nmax = @{$ri_end} - 1;
15709
15710         foreach my $n ( 0 .. $nmax ) {
15711             my $i_beg    = $ri_beg->[$n];
15712             my $i_e      = $ri_end->[$n];
15713             my $K_beg    = $K_to_go[$i_beg];
15714             my $K_e      = $K_to_go[$i_e];
15715             my $K_end    = $K_e;
15716             my $type_end = $rLL->[$K_end]->[_TYPE_];
15717             if ( $type_end eq '#' ) {
15718                 $K_end = $self->K_previous_nonblank($K_end);
15719                 if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
15720             }
15721
15722             # we are looking for a line ending in closing brace
15723             next
15724               unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
15725
15726             # ...and preceded by a semicolon on the same line
15727             my $K_semicolon = $self->K_previous_nonblank($K_end);
15728             next unless defined($K_semicolon);
15729             my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
15730             next if ( $i_semicolon <= $i_beg );
15731             next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
15732
15733             # Safety check - shouldn't happen - not critical
15734             # This is not worth throwing a Fault, except in DEVEL_MODE
15735             if ( $types_to_go[$i_semicolon] ne ';' ) {
15736                 DEVEL_MODE
15737                   && Fault("unexpected type looking for semicolon");
15738                 next;
15739             }
15740
15741             # ... with the corresponding opening brace on the same line
15742             my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
15743             my $K_opening     = $K_opening_container->{$type_sequence};
15744             next unless ( defined($K_opening) );
15745             my $i_opening = $i_beg + ( $K_opening - $K_beg );
15746             next if ( $i_opening < $i_beg );
15747
15748             # ... and only one semicolon between these braces
15749             my $semicolon_count = 0;
15750             foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
15751                 if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
15752                     $semicolon_count++;
15753                     last;
15754                 }
15755             }
15756             next if ($semicolon_count);
15757
15758             # ...ok, then make the semicolon invisible
15759             my $len = $token_lengths_to_go[$i_semicolon];
15760             $tokens_to_go[$i_semicolon]            = EMPTY_STRING;
15761             $token_lengths_to_go[$i_semicolon]     = 0;
15762             $rLL->[$K_semicolon]->[_TOKEN_]        = EMPTY_STRING;
15763             $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
15764             foreach ( $i_semicolon .. $max_index_to_go ) {
15765                 $summed_lengths_to_go[ $_ + 1 ] -= $len;
15766             }
15767         }
15768         return;
15769     } ## end sub delete_one_line_semicolons
15770
15771     use constant DEBUG_RECOMBINE => 0;
15772
15773     sub recombine_breakpoints {
15774
15775         # We are given indexes to the current lines:
15776         #  $ri_beg = ref to array of BEGinning indexes of each line
15777         #  $ri_end = ref to array of ENDing indexes of each line
15778         my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
15779
15780         # sub break_long_lines is very liberal in setting line breaks
15781         # for long lines, always setting breaks at good breakpoints, even
15782         # when that creates small lines.  Sometimes small line fragments
15783         # are produced which would look better if they were combined.
15784         # That's the task of this routine.
15785
15786         # do nothing under extreme stress
15787         return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
15788
15789         my $rK_weld_right = $self->[_rK_weld_right_];
15790         my $rK_weld_left  = $self->[_rK_weld_left_];
15791
15792         my $nmax_start = @{$ri_end} - 1;
15793         return if ( $nmax_start <= 0 );
15794
15795         # Make a list of all good joining tokens between the lines
15796         # n-1 and n.
15797         my @joint;
15798
15799         # Break the total batch sub-sections with lengths short enough to
15800         # recombine
15801         my $rsections = [];
15802         my $nbeg_sec  = 0;
15803         my $nend_sec;
15804         my $nmax_section = 0;
15805         foreach my $nn ( 1 .. $nmax_start ) {
15806             my $ibeg_1 = $ri_beg->[ $nn - 1 ];
15807             my $iend_1 = $ri_end->[ $nn - 1 ];
15808             my $iend_2 = $ri_end->[$nn];
15809             my $ibeg_2 = $ri_beg->[$nn];
15810
15811             # Define the joint variable
15812             my ( $itok, $itokp, $itokm );
15813             foreach my $itest ( $iend_1, $ibeg_2 ) {
15814                 my $type = $types_to_go[$itest];
15815                 if (   $is_math_op{$type}
15816                     || $is_amp_amp{$type}
15817                     || $is_assignment{$type}
15818                     || $type eq ':' )
15819                 {
15820                     $itok = $itest;
15821                 }
15822             }
15823             $joint[$nn] = [$itok];
15824
15825             # Update the section list
15826             my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15827             if (
15828                 $excess <= 1
15829
15830                 # The number 5 here is an arbitrary small number intended
15831                 # to keep most small matches in one sub-section.
15832                 || ( defined($nend_sec)
15833                     && ( $nn < 5 || $nmax_start - $nn < 5 ) )
15834               )
15835             {
15836                 $nend_sec = $nn;
15837             }
15838             else {
15839                 if ( defined($nend_sec) ) {
15840                     push @{$rsections}, [ $nbeg_sec, $nend_sec ];
15841                     my $num = $nend_sec - $nbeg_sec;
15842                     if ( $num > $nmax_section ) { $nmax_section = $num }
15843                     $nbeg_sec = $nn;
15844                     $nend_sec = undef;
15845                 }
15846                 $nbeg_sec = $nn;
15847             }
15848         }
15849         if ( defined($nend_sec) ) {
15850             push @{$rsections}, [ $nbeg_sec, $nend_sec ];
15851             my $num = $nend_sec - $nbeg_sec;
15852             if ( $num > $nmax_section ) { $nmax_section = $num }
15853         }
15854
15855         my $num_sections = @{$rsections};
15856
15857         # This is potentially an O(n-squared) loop, but not critical, so we can
15858         # put a finite limit on the total number of iterations. This is
15859         # suggested by issue c118, which pushed about 5.e5 lines through here
15860         # and caused an excessive run time.
15861
15862         # Three lines of defense have been put in place to prevent excessive
15863         # run times:
15864         #  1. do nothing if formatting under stress (c118 was under stress)
15865         #  2. break into small sub-sections to decrease the maximum n-squared.
15866         #  3. put a finite limit on the number of iterations.
15867
15868         # Testing shows that most batches only require one or two iterations.
15869         # A very large batch which is broken into sub-sections can require one
15870         # iteration per section.  This suggests the limit here, which allows
15871         # up to 10 iterations plus one pass per sub-section.
15872         my $it_count = 0;
15873         my $it_count_max =
15874           10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
15875
15876         if ( DEBUG_RECOMBINE > 1 ) {
15877             my $max = 0;
15878             print STDERR
15879               "-----\n$num_sections sections found for nmax=$nmax_start\n";
15880             foreach my $sect ( @{$rsections} ) {
15881                 my ( $nbeg, $nend ) = @{$sect};
15882                 my $num = $nend - $nbeg;
15883                 if ( $num > $max ) { $max = $num }
15884                 print STDERR "$nbeg $nend\n";
15885             }
15886             print STDERR "max size=$max of $nmax_start lines\n";
15887         }
15888
15889         # Loop over all sub-sections.  Note that we have to work backwards
15890         # from the end of the batch since the sections use original line
15891         # numbers, and the line numbers change as we go.
15892         while ( my $section = pop @{$rsections} ) {
15893             my ( $nbeg, $nend ) = @{$section};
15894
15895             # number of ending lines to leave untouched in this pass
15896             my $nmax_sec   = @{$ri_end} - 1;
15897             my $num_freeze = $nmax_sec - $nend;
15898
15899             my $more_to_do = 1;
15900
15901             # We keep looping over all of the lines of this batch
15902             # until there are no more possible recombinations
15903             my $nmax_last = $nmax_sec + 1;
15904             my $reverse   = 0;
15905
15906             while ($more_to_do) {
15907
15908                 # Safety check for excess total iterations
15909                 $it_count++;
15910                 if ( $it_count > $it_count_max ) {
15911                     goto RETURN;
15912                 }
15913
15914                 my $n_best = 0;
15915                 my $bs_best;
15916                 my $nmax = @{$ri_end} - 1;
15917
15918                 # Safety check for infinite loop: the line count must decrease
15919                 unless ( $nmax < $nmax_last ) {
15920
15921                     # Shouldn't happen because splice below decreases nmax on
15922                     # each iteration.  An error can only be due to a recent
15923                     # programming change.  We better stop here.
15924                     if (DEVEL_MODE) {
15925                         Fault(
15926 "Program bug-infinite loop in recombine breakpoints\n"
15927                         );
15928                     }
15929                     $more_to_do = 0;
15930                     last;
15931                 }
15932                 $nmax_last  = $nmax;
15933                 $more_to_do = 0;
15934                 my $skip_Section_3;
15935                 my $leading_amp_count = 0;
15936                 my $this_line_is_semicolon_terminated;
15937
15938                 # loop over all remaining lines in this batch
15939                 my $nstop = $nmax - $num_freeze;
15940                 for my $iter ( $nbeg + 1 .. $nstop ) {
15941
15942                     # alternating sweep direction gives symmetric results
15943                     # for recombining lines which exceed the line length
15944                     # such as eval {{{{.... }}}}
15945                     my $n;
15946                     if   ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
15947                     else            { $n = $iter }
15948
15949                     #----------------------------------------------------------
15950                     # If we join the current pair of lines,
15951                     # line $n-1 will become the left part of the joined line
15952                     # line $n will become the right part of the joined line
15953                     #
15954                     # Here are Indexes of the endpoint tokens of the two lines:
15955                     #
15956                     #  -----line $n-1--- | -----line $n-----
15957                     #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
15958                     #                    ^
15959                     #                    |
15960                     # We want to decide if we should remove the line break
15961                     # between the tokens at $iend_1 and $ibeg_2
15962                     #
15963                     # We will apply a number of ad-hoc tests to see if joining
15964                     # here will look ok.  The code will just issue a 'next'
15965                     # command if the join doesn't look good.  If we get through
15966                     # the gauntlet of tests, the lines will be recombined.
15967                     #----------------------------------------------------------
15968                     #
15969                     # beginning and ending tokens of the lines we are working on
15970                     my $ibeg_1    = $ri_beg->[ $n - 1 ];
15971                     my $iend_1    = $ri_end->[ $n - 1 ];
15972                     my $iend_2    = $ri_end->[$n];
15973                     my $ibeg_2    = $ri_beg->[$n];
15974                     my $ibeg_nmax = $ri_beg->[$nmax];
15975
15976                     # combined line cannot be too long
15977                     my $excess =
15978                       $self->excess_line_length( $ibeg_1, $iend_2, 1 );
15979                     next if ( $excess > 0 );
15980
15981                     my $type_iend_1 = $types_to_go[$iend_1];
15982                     my $type_iend_2 = $types_to_go[$iend_2];
15983                     my $type_ibeg_1 = $types_to_go[$ibeg_1];
15984                     my $type_ibeg_2 = $types_to_go[$ibeg_2];
15985
15986                     # terminal token of line 2 if any side comment is ignored:
15987                     my $iend_2t      = $iend_2;
15988                     my $type_iend_2t = $type_iend_2;
15989
15990                     # some beginning indexes of other lines, which may not exist
15991                     my $ibeg_0 = $n > 1          ? $ri_beg->[ $n - 2 ] : -1;
15992                     my $ibeg_3 = $n < $nmax      ? $ri_beg->[ $n + 1 ] : -1;
15993                     my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
15994
15995                     my $bs_tweak = 0;
15996
15997                     #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15998                     #        $nesting_depth_to_go[$ibeg_1] );
15999
16000                     DEBUG_RECOMBINE > 1 && do {
16001                         print STDERR
16002 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
16003                     };
16004
16005                     # If line $n is the last line, we set some flags and
16006                     # do any special checks for it
16007                     if ( $n == $nmax ) {
16008
16009                         # a terminal '{' should stay where it is
16010                         # unless preceded by a fat comma
16011                         next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
16012
16013                         if (   $type_iend_2 eq '#'
16014                             && $iend_2 - $ibeg_2 >= 2
16015                             && $types_to_go[ $iend_2 - 1 ] eq 'b' )
16016                         {
16017                             $iend_2t      = $iend_2 - 2;
16018                             $type_iend_2t = $types_to_go[$iend_2t];
16019                         }
16020
16021                         $this_line_is_semicolon_terminated =
16022                           $type_iend_2t eq ';';
16023                     }
16024
16025                     #----------------------------------------------------------
16026                     # Recombine Section 0:
16027                     # Examine the special token joining this line pair, if any.
16028                     # Put as many tests in this section to avoid duplicate code
16029                     # and to make formatting independent of whether breaks are
16030                     # to the left or right of an operator.
16031                     #----------------------------------------------------------
16032
16033                     my ($itok) = @{ $joint[$n] };
16034                     if ($itok) {
16035
16036                         my $type = $types_to_go[$itok];
16037
16038                         if ( $type eq ':' ) {
16039
16040                             # do not join at a colon unless it disobeys the
16041                             # break request
16042                             if ( $itok eq $iend_1 ) {
16043                                 next unless $want_break_before{$type};
16044                             }
16045                             else {
16046                                 $leading_amp_count++;
16047                                 next if $want_break_before{$type};
16048                             }
16049                         } ## end if ':'
16050
16051                         # handle math operators + - * /
16052                         elsif ( $is_math_op{$type} ) {
16053
16054                             # Combine these lines if this line is a single
16055                             # number, or if it is a short term with same
16056                             # operator as the previous line.  For example, in
16057                             # the following code we will combine all of the
16058                             # short terms $A, $B, $C, $D, $E, $F, together
16059                             # instead of leaving them one per line:
16060                             #  my $time =
16061                             #    $A * $B * $C * $D * $E * $F *
16062                             #    ( 2. * $eps * $sigma * $area ) *
16063                             #    ( 1. / $tcold**3 - 1. / $thot**3 );
16064
16065                             # This can be important in math-intensive code.
16066
16067                             my $good_combo;
16068
16069                             my $itokp  = min( $inext_to_go[$itok],  $iend_2 );
16070                             my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
16071                             my $itokm  = max( $iprev_to_go[$itok],  $ibeg_1 );
16072                             my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
16073
16074                             # check for a number on the right
16075                             if ( $types_to_go[$itokp] eq 'n' ) {
16076
16077                                 # ok if nothing else on right
16078                                 if ( $itokp == $iend_2 ) {
16079                                     $good_combo = 1;
16080                                 }
16081                                 else {
16082
16083                                     # look one more token to right..
16084                                     # okay if math operator or some termination
16085                                     $good_combo =
16086                                       ( ( $itokpp == $iend_2 )
16087                                           && $is_math_op{ $types_to_go[$itokpp]
16088                                           } )
16089                                       || $types_to_go[$itokpp] =~ /^[#,;]$/;
16090                                 }
16091                             }
16092
16093                             # check for a number on the left
16094                             if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
16095
16096                                 # okay if nothing else to left
16097                                 if ( $itokm == $ibeg_1 ) {
16098                                     $good_combo = 1;
16099                                 }
16100
16101                                 # otherwise look one more token to left
16102                                 else {
16103
16104                                    # okay if math operator, comma, or assignment
16105                                     $good_combo = ( $itokmm == $ibeg_1 )
16106                                       && ( $is_math_op{ $types_to_go[$itokmm] }
16107                                         || $types_to_go[$itokmm] =~ /^[,]$/
16108                                         || $is_assignment{ $types_to_go[$itokmm]
16109                                         } );
16110                                 }
16111                             }
16112
16113                             # look for a single short token either side of the
16114                             # operator
16115                             if ( !$good_combo ) {
16116
16117                                 # Slight adjustment factor to make results
16118                                 # independent of break before or after operator
16119                                 # in long summed lists.  (An operator and a
16120                                 # space make two spaces).
16121                                 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
16122
16123                                 $good_combo =
16124
16125                                   # numbers or id's on both sides of this joint
16126                                   $types_to_go[$itokp] =~ /^[in]$/
16127                                   && $types_to_go[$itokm] =~ /^[in]$/
16128
16129                                   # one of the two lines must be short:
16130                                   && (
16131                                     (
16132                                         # no more than 2 nonblank tokens right
16133                                         # of joint
16134                                         $itokpp == $iend_2
16135
16136                                         # short
16137                                         && token_sequence_length(
16138                                             $itokp, $iend_2
16139                                         ) < $two +
16140                                         $rOpts_short_concatenation_item_length
16141                                     )
16142                                     || (
16143                                         # no more than 2 nonblank tokens left of
16144                                         # joint
16145                                         $itokmm == $ibeg_1
16146
16147                                         # short
16148                                         && token_sequence_length(
16149                                             $ibeg_1, $itokm
16150                                         ) < 2 - $two +
16151                                         $rOpts_short_concatenation_item_length
16152                                     )
16153
16154                                   )
16155
16156                                   # keep pure terms; don't mix +- with */
16157                                   && !(
16158                                     $is_plus_minus{$type}
16159                                     && (   $is_mult_div{ $types_to_go[$itokmm] }
16160                                         || $is_mult_div{ $types_to_go[$itokpp] }
16161                                     )
16162                                   )
16163                                   && !(
16164                                     $is_mult_div{$type}
16165                                     && ( $is_plus_minus{ $types_to_go[$itokmm] }
16166                                         || $is_plus_minus{ $types_to_go[$itokpp]
16167                                         } )
16168                                   )
16169
16170                                   ;
16171                             }
16172
16173                             # it is also good to combine if we can reduce to 2
16174                             # lines
16175                             if ( !$good_combo ) {
16176
16177                                 # index on other line where same token would be
16178                                 # in a long chain.
16179                                 my $iother =
16180                                   ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
16181
16182                                 $good_combo =
16183                                      $n == 2
16184                                   && $n == $nmax
16185                                   && $types_to_go[$iother] ne $type;
16186                             }
16187
16188                             next unless ($good_combo);
16189
16190                         } ## end math
16191
16192                         elsif ( $is_amp_amp{$type} ) {
16193                             ##TBD
16194                         } ## end &&, ||
16195
16196                         elsif ( $is_assignment{$type} ) {
16197                             ##TBD
16198                         } ## end assignment
16199                     }
16200
16201                     #----------------------------------------------------------
16202                     # Recombine Section 1:
16203                     # Join welded nested containers immediately
16204                     #----------------------------------------------------------
16205
16206                     if (
16207                         $total_weld_count
16208                         && ( $type_sequence_to_go[$iend_1]
16209                             && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
16210                             || $type_sequence_to_go[$ibeg_2]
16211                             && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
16212                       )
16213                     {
16214                         $n_best = $n;
16215                         last;
16216                     }
16217
16218                     $reverse = 0;
16219
16220                     #----------------------------------------------------------
16221                     # Recombine Section 2:
16222                     # Examine token at $iend_1 (right end of first line of pair)
16223                     #----------------------------------------------------------
16224
16225                     # an isolated '}' may join with a ';' terminated segment
16226                     if ( $type_iend_1 eq '}' ) {
16227
16228                     # Check for cases where combining a semicolon terminated
16229                     # statement with a previous isolated closing paren will
16230                     # allow the combined line to be outdented.  This is
16231                     # generally a good move.  For example, we can join up
16232                     # the last two lines here:
16233                     #  (
16234                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16235                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16236                     #    )
16237                     #    = stat($file);
16238                     #
16239                     # to get:
16240                     #  (
16241                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16242                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16243                     #  ) = stat($file);
16244                     #
16245                     # which makes the parens line up.
16246                     #
16247                     # Another example, from Joe Matarazzo, probably looks best
16248                     # with the 'or' clause appended to the trailing paren:
16249                     #  $self->some_method(
16250                     #      PARAM1 => 'foo',
16251                     #      PARAM2 => 'bar'
16252                     #  ) or die "Some_method didn't work";
16253                     #
16254                     # But we do not want to do this for something like the -lp
16255                     # option where the paren is not outdentable because the
16256                     # trailing clause will be far to the right.
16257                     #
16258                     # The logic here is synchronized with the logic in sub
16259                     # sub final_indentation_adjustment, which actually does
16260                     # the outdenting.
16261                     #
16262                         $skip_Section_3 ||= $this_line_is_semicolon_terminated
16263
16264                           # only one token on last line
16265                           && $ibeg_1 == $iend_1
16266
16267                           # must be structural paren
16268                           && $tokens_to_go[$iend_1] eq ')'
16269
16270                           # style must allow outdenting,
16271                           && !$closing_token_indentation{')'}
16272
16273                           # only leading '&&', '||', and ':' if no others seen
16274                           # (but note: our count made below could be wrong
16275                           # due to intervening comments)
16276                           && ( $leading_amp_count == 0
16277                             || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
16278
16279                           # but leading colons probably line up with a
16280                           # previous colon or question (count could be wrong).
16281                           && $type_ibeg_2 ne ':'
16282
16283                           # only one step in depth allowed.  this line must not
16284                           # begin with a ')' itself.
16285                           && ( $nesting_depth_to_go[$iend_1] ==
16286                             $nesting_depth_to_go[$iend_2] + 1 );
16287
16288                         # YVES patch 2 of 2:
16289                         # Allow cuddled eval chains, like this:
16290                         #   eval {
16291                         #       #STUFF;
16292                         #       1; # return true
16293                         #   } or do {
16294                         #       #handle error
16295                         #   };
16296                         # This patch works together with a patch in
16297                         # setting adjusted indentation (where the closing eval
16298                         # brace is outdented if possible).
16299                         # The problem is that an 'eval' block has continuation
16300                         # indentation and it looks better to undo it in some
16301                         # cases.  If we do not use this patch we would get:
16302                         #   eval {
16303                         #       #STUFF;
16304                         #       1; # return true
16305                         #       }
16306                         #       or do {
16307                         #       #handle error
16308                         #     };
16309                         # The alternative, for uncuddled style, is to create
16310                         # a patch in final_indentation_adjustment which undoes
16311                         # the indentation of a leading line like 'or do {'.
16312                         # This doesn't work well with -icb through
16313                         if (
16314                                $block_type_to_go[$iend_1] eq 'eval'
16315                             && !ref( $leading_spaces_to_go[$iend_1] )
16316                             && !$rOpts_indent_closing_brace
16317                             && $tokens_to_go[$iend_2] eq '{'
16318                             && (
16319                                 ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
16320                                 || (   $type_ibeg_2 eq 'k'
16321                                     && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16322                                 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16323                             )
16324                           )
16325                         {
16326                             $skip_Section_3 ||= 1;
16327                         }
16328
16329                         next
16330                           unless (
16331                             $skip_Section_3
16332
16333                             # handle '.' and '?' specially below
16334                             || ( $type_ibeg_2 =~ /^[\.\?]$/ )
16335
16336                             # fix for c054 (unusual -pbp case)
16337                             || $type_ibeg_2 eq '=='
16338
16339                           );
16340                     }
16341
16342                     elsif ( $type_iend_1 eq '{' ) {
16343
16344                         # YVES
16345                         # honor breaks at opening brace
16346                         # Added to prevent recombining something like this:
16347                         #  } || eval { package main;
16348                         next if $forced_breakpoint_to_go[$iend_1];
16349                     }
16350
16351                     # do not recombine lines with ending &&, ||,
16352                     elsif ( $is_amp_amp{$type_iend_1} ) {
16353                         next unless $want_break_before{$type_iend_1};
16354                     }
16355
16356                     # Identify and recombine a broken ?/: chain
16357                     elsif ( $type_iend_1 eq '?' ) {
16358
16359                         # Do not recombine different levels
16360                         next
16361                           if (
16362                             $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16363
16364                         # do not recombine unless next line ends in :
16365                         next unless $type_iend_2 eq ':';
16366                     }
16367
16368                     # for lines ending in a comma...
16369                     elsif ( $type_iend_1 eq ',' ) {
16370
16371                         # Do not recombine at comma which is following the
16372                         # input bias.
16373                         # TODO: might be best to make a special flag
16374                         next if ( $old_breakpoint_to_go[$iend_1] );
16375
16376                         # An isolated '},' may join with an identifier + ';'
16377                         # This is useful for the class of a 'bless' statement
16378                         # (bless.t)
16379                         if (   $type_ibeg_1 eq '}'
16380                             && $type_ibeg_2 eq 'i' )
16381                         {
16382                             next
16383                               unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16384                                 && ( $iend_2 == ( $ibeg_2 + 1 ) )
16385                                 && $this_line_is_semicolon_terminated );
16386
16387                             # override breakpoint
16388                             $forced_breakpoint_to_go[$iend_1] = 0;
16389                         }
16390
16391                         # but otherwise ..
16392                         else {
16393
16394                             # do not recombine after a comma unless this will
16395                             # leave just 1 more line
16396                             next unless ( $n + 1 >= $nmax );
16397
16398                             # do not recombine if there is a change in
16399                             # indentation depth
16400                             next
16401                               if ( $levels_to_go[$iend_1] !=
16402                                 $levels_to_go[$iend_2] );
16403
16404                             # do not recombine a "complex expression" after a
16405                             # comma.  "complex" means no parens.
16406                             my $saw_paren;
16407                             foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16408                                 if ( $tokens_to_go[$ii] eq '(' ) {
16409                                     $saw_paren = 1;
16410                                     last;
16411                                 }
16412                             }
16413                             next if $saw_paren;
16414                         }
16415                     }
16416
16417                     # opening paren..
16418                     elsif ( $type_iend_1 eq '(' ) {
16419
16420                         # No longer doing this
16421                     }
16422
16423                     elsif ( $type_iend_1 eq ')' ) {
16424
16425                         # No longer doing this
16426                     }
16427
16428                     # keep a terminal for-semicolon
16429                     elsif ( $type_iend_1 eq 'f' ) {
16430                         next;
16431                     }
16432
16433                     # if '=' at end of line ...
16434                     elsif ( $is_assignment{$type_iend_1} ) {
16435
16436                         # keep break after = if it was in input stream
16437                         # this helps prevent 'blinkers'
16438                         next
16439                           if (
16440                             $old_breakpoint_to_go[$iend_1]
16441
16442                             # don't strand an isolated '='
16443                             && $iend_1 != $ibeg_1
16444                           );
16445
16446                         my $is_short_quote =
16447                           (      $type_ibeg_2 eq 'Q'
16448                               && $ibeg_2 == $iend_2
16449                               && token_sequence_length( $ibeg_2, $ibeg_2 ) <
16450                               $rOpts_short_concatenation_item_length );
16451                         my $is_ternary = (
16452                             $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
16453                                 && $types_to_go[$ibeg_3] eq ':' )
16454                         );
16455
16456                         # always join an isolated '=', a short quote, or if this
16457                         # will put ?/: at start of adjacent lines
16458                         if (   $ibeg_1 != $iend_1
16459                             && !$is_short_quote
16460                             && !$is_ternary )
16461                         {
16462                             next
16463                               unless (
16464                                 (
16465
16466                                     # unless we can reduce this to two lines
16467                                     $nmax < $n + 2
16468
16469                                     # or three lines, the last with a leading
16470                                     # semicolon
16471                                     || (   $nmax == $n + 2
16472                                         && $types_to_go[$ibeg_nmax] eq ';' )
16473
16474                                     # or the next line ends with a here doc
16475                                     || $type_iend_2 eq 'h'
16476
16477                                     # or the next line ends in an open paren or
16478                                     # brace and the break hasn't been forced
16479                                     # [dima.t]
16480                                     || (  !$forced_breakpoint_to_go[$iend_1]
16481                                         && $type_iend_2 eq '{' )
16482                                 )
16483
16484                                 # do not recombine if the two lines might align
16485                                 # well this is a very approximate test for this
16486                                 && (
16487
16488                                     # RT#127633 - the leading tokens are not
16489                                     # operators
16490                                     ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
16491
16492                                     # or they are different
16493                                     || (   $ibeg_3 >= 0
16494                                         && $type_ibeg_2 ne
16495                                         $types_to_go[$ibeg_3] )
16496                                 )
16497                               );
16498
16499                             if (
16500
16501                                 # Recombine if we can make two lines
16502                                 $nmax >= $n + 2
16503
16504                                 # -lp users often prefer this:
16505                                 #  my $title = function($env, $env, $sysarea,
16506                                 #                       "bubba Borrower Entry");
16507                                 #  so we will recombine if -lp is used we have
16508                                 #  ending comma
16509                                 && !(
16510                                        $ibeg_3 > 0
16511                                     && ref( $leading_spaces_to_go[$ibeg_3] )
16512                                     && $type_iend_2 eq ','
16513                                 )
16514                               )
16515                             {
16516
16517                                 # otherwise, scan the rhs line up to last token
16518                                 # for complexity.  Note that we are not
16519                                 # counting the last token in case it is an
16520                                 # opening paren.
16521                                 my $tv    = 0;
16522                                 my $depth = $nesting_depth_to_go[$ibeg_2];
16523                                 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
16524                                     if ( $nesting_depth_to_go[$i] != $depth ) {
16525                                         $tv++;
16526                                         last if ( $tv > 1 );
16527                                     }
16528                                     $depth = $nesting_depth_to_go[$i];
16529                                 }
16530
16531                                 # ok to recombine if no level changes before
16532                                 # last token
16533                                 if ( $tv > 0 ) {
16534
16535                                     # otherwise, do not recombine if more than
16536                                     # two level changes.
16537                                     next if ( $tv > 1 );
16538
16539                                     # check total complexity of the two
16540                                     # adjacent lines that will occur if we do
16541                                     # this join
16542                                     my $istop =
16543                                       ( $n < $nmax )
16544                                       ? $ri_end->[ $n + 1 ]
16545                                       : $iend_2;
16546                                     foreach my $i ( $iend_2 .. $istop ) {
16547                                         if (
16548                                             $nesting_depth_to_go[$i] != $depth )
16549                                         {
16550                                             $tv++;
16551                                             last if ( $tv > 2 );
16552                                         }
16553                                         $depth = $nesting_depth_to_go[$i];
16554                                     }
16555
16556                                     # do not recombine if total is more than 2
16557                                     # level changes
16558                                     next if ( $tv > 2 );
16559                                 }
16560                             }
16561                         }
16562
16563                         unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16564                             $forced_breakpoint_to_go[$iend_1] = 0;
16565                         }
16566                     }
16567
16568                     # for keywords..
16569                     elsif ( $type_iend_1 eq 'k' ) {
16570
16571                         # make major control keywords stand out
16572                         # (recombine.t)
16573                         next
16574                           if (
16575
16576                             #/^(last|next|redo|return)$/
16577                             $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16578
16579                             # but only if followed by multiple lines
16580                             && $n < $nmax
16581                           );
16582
16583                         if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16584                             next
16585                               unless $want_break_before{ $tokens_to_go[$iend_1]
16586                               };
16587                         }
16588                     }
16589
16590                     #----------------------------------------------------------
16591                     # Recombine Section 3:
16592                     # Examine token at $ibeg_2 (left end of second line of pair)
16593                     #----------------------------------------------------------
16594
16595                     # join lines identified above as capable of
16596                     # causing an outdented line with leading closing paren
16597                     # Note that we are skipping the rest of this section
16598                     # and the rest of the loop to do the join
16599                     if ($skip_Section_3) {
16600                         $forced_breakpoint_to_go[$iend_1] = 0;
16601                         $n_best = $n;
16602                         last;
16603                     }
16604
16605                     # handle lines with leading &&, ||
16606                     elsif ( $is_amp_amp{$type_ibeg_2} ) {
16607
16608                         $leading_amp_count++;
16609
16610                         # ok to recombine if it follows a ? or :
16611                         # and is followed by an open paren..
16612                         my $ok =
16613                           (      $is_ternary{$type_ibeg_1}
16614                               && $tokens_to_go[$iend_2] eq '(' )
16615
16616                     # or is followed by a ? or : at same depth
16617                     #
16618                     # We are looking for something like this. We can
16619                     # recombine the && line with the line above to make the
16620                     # structure more clear:
16621                     #  return
16622                     #    exists $G->{Attr}->{V}
16623                     #    && exists $G->{Attr}->{V}->{$u}
16624                     #    ? %{ $G->{Attr}->{V}->{$u} }
16625                     #    : ();
16626                     #
16627                     # We should probably leave something like this alone:
16628                     #  return
16629                     #       exists $G->{Attr}->{E}
16630                     #    && exists $G->{Attr}->{E}->{$u}
16631                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
16632                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16633                     #    : ();
16634                     # so that we either have all of the &&'s (or ||'s)
16635                     # on one line, as in the first example, or break at
16636                     # each one as in the second example.  However, it
16637                     # sometimes makes things worse to check for this because
16638                     # it prevents multiple recombinations.  So this is not done.
16639                           || ( $ibeg_3 >= 0
16640                             && $is_ternary{ $types_to_go[$ibeg_3] }
16641                             && $nesting_depth_to_go[$ibeg_3] ==
16642                             $nesting_depth_to_go[$ibeg_2] );
16643
16644                         # Combine a trailing && term with an || term: fix for
16645                         # c060 This is rare but can happen.
16646                         $ok ||= 1
16647                           if ( $ibeg_3 < 0
16648                             && $type_ibeg_2 eq '&&'
16649                             && $type_ibeg_1 eq '||'
16650                             && $nesting_depth_to_go[$ibeg_2] ==
16651                             $nesting_depth_to_go[$ibeg_1] );
16652
16653                         next if !$ok && $want_break_before{$type_ibeg_2};
16654                         $forced_breakpoint_to_go[$iend_1] = 0;
16655
16656                         # tweak the bond strength to give this joint priority
16657                         # over ? and :
16658                         $bs_tweak = 0.25;
16659                     }
16660
16661                     # Identify and recombine a broken ?/: chain
16662                     elsif ( $type_ibeg_2 eq '?' ) {
16663
16664                         # Do not recombine different levels
16665                         my $lev = $levels_to_go[$ibeg_2];
16666                         next if ( $lev ne $levels_to_go[$ibeg_1] );
16667
16668                         # Do not recombine a '?' if either next line or
16669                         # previous line does not start with a ':'.  The reasons
16670                         # are that (1) no alignment of the ? will be possible
16671                         # and (2) the expression is somewhat complex, so the
16672                         # '?' is harder to see in the interior of the line.
16673                         my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
16674                         my $precedes_colon =
16675                           $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16676                         next unless ( $follows_colon || $precedes_colon );
16677
16678                         # we will always combining a ? line following a : line
16679                         if ( !$follows_colon ) {
16680
16681                             # ...otherwise recombine only if it looks like a
16682                             # chain.  we will just look at a few nearby lines
16683                             # to see if this looks like a chain.
16684                             my $local_count = 0;
16685                             foreach
16686                               my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
16687                             {
16688                                 $local_count++
16689                                   if $ii >= 0
16690                                   && $types_to_go[$ii] eq ':'
16691                                   && $levels_to_go[$ii] == $lev;
16692                             }
16693                             next unless ( $local_count > 1 );
16694                         }
16695                         $forced_breakpoint_to_go[$iend_1] = 0;
16696                     }
16697
16698                     # do not recombine lines with leading '.'
16699                     elsif ( $type_ibeg_2 eq '.' ) {
16700                         my $i_next_nonblank =
16701                           min( $inext_to_go[$ibeg_2], $iend_2 );
16702                         next
16703                           unless (
16704
16705                    # ... unless there is just one and we can reduce
16706                    # this to two lines if we do.  For example, this
16707                    #
16708                    #
16709                    #  $bodyA .=
16710                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16711                    #
16712                    #  looks better than this:
16713                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16714                    #    . '$args .= $pat;'
16715
16716                             (
16717                                    $n == 2
16718                                 && $n == $nmax
16719                                 && $type_ibeg_1 ne $type_ibeg_2
16720                             )
16721
16722                             # ... or this would strand a short quote , like this
16723                             #                . "some long quote"
16724                             #                . "\n";
16725
16726                             || (   $types_to_go[$i_next_nonblank] eq 'Q'
16727                                 && $i_next_nonblank >= $iend_2 - 1
16728                                 && $token_lengths_to_go[$i_next_nonblank] <
16729                                 $rOpts_short_concatenation_item_length )
16730                           );
16731                     }
16732
16733                     # handle leading keyword..
16734                     elsif ( $type_ibeg_2 eq 'k' ) {
16735
16736                         # handle leading "or"
16737                         if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16738                             next
16739                               unless (
16740                                 $this_line_is_semicolon_terminated
16741                                 && (
16742                                     $type_ibeg_1 eq '}'
16743                                     || (
16744
16745                                         # following 'if' or 'unless' or 'or'
16746                                         $type_ibeg_1 eq 'k'
16747                                         && $is_if_unless{ $tokens_to_go[$ibeg_1]
16748                                         }
16749
16750                                         # important: only combine a very simple
16751                                         # or statement because the step below
16752                                         # may have combined a trailing 'and'
16753                                         # with this or, and we do not want to
16754                                         # then combine everything together
16755                                         && ( $iend_2 - $ibeg_2 <= 7 )
16756                                     )
16757                                 )
16758                               );
16759
16760                             #X: RT #81854
16761                             $forced_breakpoint_to_go[$iend_1] = 0
16762                               unless ( $old_breakpoint_to_go[$iend_1] );
16763                         }
16764
16765                         # handle leading 'and' and 'xor'
16766                         elsif ($tokens_to_go[$ibeg_2] eq 'and'
16767                             || $tokens_to_go[$ibeg_2] eq 'xor' )
16768                         {
16769
16770                             # Decide if we will combine a single terminal 'and'
16771                             # after an 'if' or 'unless'.
16772
16773                             #     This looks best with the 'and' on the same
16774                             #     line as the 'if':
16775                             #
16776                             #         $a = 1
16777                             #           if $seconds and $nu < 2;
16778                             #
16779                             #     But this looks better as shown:
16780                             #
16781                             #         $a = 1
16782                             #           if !$this->{Parents}{$_}
16783                             #           or $this->{Parents}{$_} eq $_;
16784                             #
16785                             next
16786                               unless (
16787                                 $this_line_is_semicolon_terminated
16788                                 && (
16789
16790                                     # following 'if' or 'unless' or 'or'
16791                                     $type_ibeg_1 eq 'k'
16792                                     && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16793                                         || $tokens_to_go[$ibeg_1] eq 'or' )
16794                                 )
16795                               );
16796                         }
16797
16798                         # handle leading "if" and "unless"
16799                         elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16800
16801                             # Combine something like:
16802                             #    next
16803                             #      if ( $lang !~ /${l}$/i );
16804                             # into:
16805                             #    next if ( $lang !~ /${l}$/i );
16806                             next
16807                               unless (
16808                                 $this_line_is_semicolon_terminated
16809
16810                                 #  previous line begins with 'and' or 'or'
16811                                 && $type_ibeg_1 eq 'k'
16812                                 && $is_and_or{ $tokens_to_go[$ibeg_1] }
16813
16814                               );
16815                         }
16816
16817                         # handle all other leading keywords
16818                         else {
16819
16820                             # keywords look best at start of lines,
16821                             # but combine things like "1 while"
16822                             unless ( $is_assignment{$type_iend_1} ) {
16823                                 next
16824                                   if ( ( $type_iend_1 ne 'k' )
16825                                     && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16826                             }
16827                         }
16828                     }
16829
16830                     # similar treatment of && and || as above for 'and' and
16831                     # 'or': NOTE: This block of code is currently bypassed
16832                     # because of a previous block but is retained for possible
16833                     # future use.
16834                     elsif ( $is_amp_amp{$type_ibeg_2} ) {
16835
16836                         # maybe looking at something like:
16837                         # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16838
16839                         next
16840                           unless (
16841                             $this_line_is_semicolon_terminated
16842
16843                             # previous line begins with an 'if' or 'unless'
16844                             # keyword
16845                             && $type_ibeg_1 eq 'k'
16846                             && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16847
16848                           );
16849                     }
16850
16851                     # handle line with leading = or similar
16852                     elsif ( $is_assignment{$type_ibeg_2} ) {
16853                         next unless ( $n == 1 || $n == $nmax );
16854                         next if ( $old_breakpoint_to_go[$iend_1] );
16855                         next
16856                           unless (
16857
16858                             # unless we can reduce this to two lines
16859                             $nmax == 2
16860
16861                             # or three lines, the last with a leading semicolon
16862                             || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16863
16864                             # or the next line ends with a here doc
16865                             || $type_iend_2 eq 'h'
16866
16867                             # or this is a short line ending in ;
16868                             || (   $n == $nmax
16869                                 && $this_line_is_semicolon_terminated )
16870                           );
16871                         $forced_breakpoint_to_go[$iend_1] = 0;
16872                     }
16873
16874                     #----------------------------------------------------------
16875                     # Recombine Section 4:
16876                     # Combine the lines if we arrive here and it is possible
16877                     #----------------------------------------------------------
16878
16879                     # honor hard breakpoints
16880                     next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16881
16882                     my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
16883
16884                  # Require a few extra spaces before recombining lines if we are
16885                  # at an old breakpoint unless this is a simple list or terminal
16886                  # line.  The goal is to avoid oscillating between two
16887                  # quasi-stable end states.  For example this snippet caused
16888                  # problems:
16889 ##    my $this =
16890 ##    bless {
16891 ##        TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
16892 ##      },
16893 ##      $type;
16894                     next
16895                       if ( $old_breakpoint_to_go[$iend_1]
16896                         && !$this_line_is_semicolon_terminated
16897                         && $n < $nmax
16898                         && $excess + 4 > 0
16899                         && $type_iend_2 ne ',' );
16900
16901                     # do not recombine if we would skip in indentation levels
16902                     if ( $n < $nmax ) {
16903                         my $if_next = $ri_beg->[ $n + 1 ];
16904                         next
16905                           if (
16906                                $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16907                             && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16908
16909                             # but an isolated 'if (' is undesirable
16910                             && !(
16911                                    $n == 1
16912                                 && $iend_1 - $ibeg_1 <= 2
16913                                 && $type_ibeg_1 eq 'k'
16914                                 && $tokens_to_go[$ibeg_1] eq 'if'
16915                                 && $tokens_to_go[$iend_1] ne '('
16916                             )
16917                           );
16918                     }
16919
16920                     # honor no-break's
16921                     ## next if ( $bs >= NO_BREAK - 1 );  # removed for b1257
16922
16923                     # remember the pair with the greatest bond strength
16924                     if ( !$n_best ) {
16925                         $n_best  = $n;
16926                         $bs_best = $bs;
16927                     }
16928                     else {
16929
16930                         if ( $bs > $bs_best ) {
16931                             $n_best  = $n;
16932                             $bs_best = $bs;
16933                         }
16934                     }
16935                 }
16936
16937                 # recombine the pair with the greatest bond strength
16938                 if ($n_best) {
16939                     splice @{$ri_beg}, $n_best,     1;
16940                     splice @{$ri_end}, $n_best - 1, 1;
16941                     splice @joint,     $n_best,     1;
16942
16943                     # keep going if we are still making progress
16944                     $more_to_do++;
16945                 }
16946             }    # end iteration loop
16947
16948         }    # end loop over sections
16949
16950       RETURN:
16951
16952         if (DEBUG_RECOMBINE) {
16953             my $nmax_last = @{$ri_end} - 1;
16954             print STDERR
16955 "exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
16956         }
16957         return;
16958     } ## end sub recombine_breakpoints
16959 } ## end closure recombine_breakpoints
16960
16961 sub insert_final_ternary_breaks {
16962
16963     my ( $self, $ri_left, $ri_right ) = @_;
16964
16965     # Called once per batch to look for and do any final line breaks for
16966     # long ternary chains
16967
16968     my $nmax = @{$ri_right} - 1;
16969
16970     # scan the left and right end tokens of all lines
16971     my $count         = 0;
16972     my $i_first_colon = -1;
16973     for my $n ( 0 .. $nmax ) {
16974         my $il    = $ri_left->[$n];
16975         my $ir    = $ri_right->[$n];
16976         my $typel = $types_to_go[$il];
16977         my $typer = $types_to_go[$ir];
16978         return if ( $typel eq '?' );
16979         return if ( $typer eq '?' );
16980         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16981         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16982     }
16983
16984     # For long ternary chains,
16985     # if the first : we see has its ? is in the interior
16986     # of a preceding line, then see if there are any good
16987     # breakpoints before the ?.
16988     if ( $i_first_colon > 0 ) {
16989         my $i_question = $mate_index_to_go[$i_first_colon];
16990         if ( $i_question > 0 ) {
16991             my @insert_list;
16992             foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
16993                 my $token = $tokens_to_go[$ii];
16994                 my $type  = $types_to_go[$ii];
16995
16996                 # For now, a good break is either a comma or,
16997                 # in a long chain, a 'return'.
16998                 # Patch for RT #126633: added the $nmax>1 check to avoid
16999                 # breaking after a return for a simple ternary.  For longer
17000                 # chains the break after return allows vertical alignment, so
17001                 # it is still done.  So perltidy -wba='?' will not break
17002                 # immediately after the return in the following statement:
17003                 # sub x {
17004                 #    return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
17005                 #      'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
17006                 # }
17007                 if (
17008                     (
17009                            $type eq ','
17010                         || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
17011                     )
17012                     && $self->in_same_container_i( $ii, $i_question )
17013                   )
17014                 {
17015                     push @insert_list, $ii;
17016                     last;
17017                 }
17018             }
17019
17020             # insert any new break points
17021             if (@insert_list) {
17022                 $self->insert_additional_breaks( \@insert_list, $ri_left,
17023                     $ri_right );
17024             }
17025         }
17026     }
17027     return;
17028 } ## end sub insert_final_ternary_breaks
17029
17030 sub insert_breaks_before_list_opening_containers {
17031
17032     my ( $self, $ri_left, $ri_right ) = @_;
17033
17034     # This routine is called once per batch to implement the parameters
17035     # --break-before-hash-brace, etc.
17036
17037     # Nothing to do if none of these parameters has been set
17038     return unless %break_before_container_types;
17039
17040     my $nmax = @{$ri_right} - 1;
17041     return unless ( $nmax >= 0 );
17042
17043     my $rLL = $self->[_rLL_];
17044
17045     my $rbreak_before_container_by_seqno =
17046       $self->[_rbreak_before_container_by_seqno_];
17047     my $rK_weld_left = $self->[_rK_weld_left_];
17048
17049     # scan the ends of all lines
17050     my @insert_list;
17051     for my $n ( 0 .. $nmax ) {
17052         my $il = $ri_left->[$n];
17053         my $ir = $ri_right->[$n];
17054         next unless ( $ir > $il );
17055         my $Kl       = $K_to_go[$il];
17056         my $Kr       = $K_to_go[$ir];
17057         my $Kend     = $Kr;
17058         my $type_end = $rLL->[$Kr]->[_TYPE_];
17059
17060         # Backup before any side comment
17061         if ( $type_end eq '#' ) {
17062             $Kend = $self->K_previous_nonblank($Kr);
17063             next unless defined($Kend);
17064             $type_end = $rLL->[$Kend]->[_TYPE_];
17065         }
17066
17067         # Backup to the start of any weld; fix for b1173.
17068         if ($total_weld_count) {
17069             my $Kend_test = $rK_weld_left->{$Kend};
17070             if ( defined($Kend_test) && $Kend_test > $Kl ) {
17071                 $Kend      = $Kend_test;
17072                 $Kend_test = $rK_weld_left->{$Kend};
17073             }
17074
17075             # Do not break if we did not back up to the start of a weld
17076             # (shouldn't happen)
17077             next if ( defined($Kend_test) );
17078         }
17079
17080         my $token = $rLL->[$Kend]->[_TOKEN_];
17081         next unless ( $is_opening_token{$token} );
17082         next unless ( $Kl < $Kend - 1 );
17083
17084         my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
17085         next unless ( defined($seqno) );
17086
17087         # Use the flag which was previously set
17088         next unless ( $rbreak_before_container_by_seqno->{$seqno} );
17089
17090         # Install a break before this opening token.
17091         my $Kbreak = $self->K_previous_nonblank($Kend);
17092         my $ibreak = $Kbreak - $Kl + $il;
17093         next if ( $ibreak < $il );
17094         next if ( $nobreak_to_go[$ibreak] );
17095         push @insert_list, $ibreak;
17096     }
17097
17098     # insert any new break points
17099     if (@insert_list) {
17100         $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17101     }
17102     return;
17103 } ## end sub insert_breaks_before_list_opening_containers
17104
17105 sub note_added_semicolon {
17106     my ( $self, $line_number ) = @_;
17107     $self->[_last_added_semicolon_at_] = $line_number;
17108     if ( $self->[_added_semicolon_count_] == 0 ) {
17109         $self->[_first_added_semicolon_at_] = $line_number;
17110     }
17111     $self->[_added_semicolon_count_]++;
17112     write_logfile_entry("Added ';' here\n");
17113     return;
17114 } ## end sub note_added_semicolon
17115
17116 sub note_deleted_semicolon {
17117     my ( $self, $line_number ) = @_;
17118     $self->[_last_deleted_semicolon_at_] = $line_number;
17119     if ( $self->[_deleted_semicolon_count_] == 0 ) {
17120         $self->[_first_deleted_semicolon_at_] = $line_number;
17121     }
17122     $self->[_deleted_semicolon_count_]++;
17123     write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
17124     return;
17125 } ## end sub note_deleted_semicolon
17126
17127 sub note_embedded_tab {
17128     my ( $self, $line_number ) = @_;
17129     $self->[_embedded_tab_count_]++;
17130     $self->[_last_embedded_tab_at_] = $line_number;
17131     if ( !$self->[_first_embedded_tab_at_] ) {
17132         $self->[_first_embedded_tab_at_] = $line_number;
17133     }
17134
17135     if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
17136         write_logfile_entry("Embedded tabs in quote or pattern\n");
17137     }
17138     return;
17139 } ## end sub note_embedded_tab
17140
17141 use constant DEBUG_CORRECT_LP => 0;
17142
17143 sub correct_lp_indentation {
17144
17145     # When the -lp option is used, we need to make a last pass through
17146     # each line to correct the indentation positions in case they differ
17147     # from the predictions.  This is necessary because perltidy uses a
17148     # predictor/corrector method for aligning with opening parens.  The
17149     # predictor is usually good, but sometimes stumbles.  The corrector
17150     # tries to patch things up once the actual opening paren locations
17151     # are known.
17152     my ( $self, $ri_first, $ri_last ) = @_;
17153     my $K_opening_container = $self->[_K_opening_container_];
17154     my $K_closing_container = $self->[_K_closing_container_];
17155     my $do_not_pad          = 0;
17156
17157     #  Note on flag '$do_not_pad':
17158     #  We want to avoid a situation like this, where the aligner inserts
17159     #  whitespace before the '=' to align it with a previous '=', because
17160     #  otherwise the parens might become mis-aligned in a situation like
17161     #  this, where the '=' has become aligned with the previous line,
17162     #  pushing the opening '(' forward beyond where we want it.
17163     #
17164     #  $mkFloor::currentRoom = '';
17165     #  $mkFloor::c_entry     = $c->Entry(
17166     #                                 -width        => '10',
17167     #                                 -relief       => 'sunken',
17168     #                                 ...
17169     #                                 );
17170     #
17171     #  We leave it to the aligner to decide how to do this.
17172
17173     # first remove continuation indentation if appropriate
17174     my $rLL      = $self->[_rLL_];
17175     my $max_line = @{$ri_first} - 1;
17176
17177     #---------------------------------------------------------------------------
17178     # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
17179     #---------------------------------------------------------------------------
17180
17181     # The point is that sub 'starting_one_line_block' made one-line blocks based
17182     # on default indentation, not -lp indentation. So some of the one-line
17183     # blocks may be too long when given -lp indentation.  We will fix that now
17184     # if possible, using the list of these closing block indexes.
17185     my $ri_starting_one_line_block =
17186       $self->[_this_batch_]->[_ri_starting_one_line_block_];
17187     if ( @{$ri_starting_one_line_block} ) {
17188         my @ilist = @{$ri_starting_one_line_block};
17189         my $inext = shift(@ilist);
17190
17191         # loop over lines, checking length of each with a one-line block
17192         my ( $ibeg, $iend );
17193         foreach my $line ( 0 .. $max_line ) {
17194             $iend = $ri_last->[$line];
17195             next if ( $inext > $iend );
17196             $ibeg = $ri_first->[$line];
17197
17198             # This is just for lines with indentation objects (c098)
17199             my $excess =
17200               ref( $leading_spaces_to_go[$ibeg] )
17201               ? $self->excess_line_length( $ibeg, $iend )
17202               : 0;
17203
17204             if ( $excess > 0 ) {
17205                 my $available_spaces = $self->get_available_spaces_to_go($ibeg);
17206
17207                 if ( $available_spaces > 0 ) {
17208                     my $delete_want = min( $available_spaces, $excess );
17209                     my $deleted_spaces =
17210                       $self->reduce_lp_indentation( $ibeg, $delete_want );
17211                     $available_spaces =
17212                       $self->get_available_spaces_to_go($ibeg);
17213                 }
17214             }
17215
17216             # skip forward to next one-line block to check
17217             while (@ilist) {
17218                 $inext = shift @ilist;
17219                 next if ( $inext <= $iend );
17220                 last if ( $inext > $iend );
17221             }
17222             last if ( $inext <= $iend );
17223         }
17224     }
17225
17226     #-------------------------------------------------------------------
17227     # PASS 2: look for and fix other problems in each line of this batch
17228     #-------------------------------------------------------------------
17229
17230     # look at each output line ...
17231     my ( $ibeg, $iend );
17232     foreach my $line ( 0 .. $max_line ) {
17233         $ibeg = $ri_first->[$line];
17234         $iend = $ri_last->[$line];
17235
17236         # looking at each token in this output line ...
17237         foreach my $i ( $ibeg .. $iend ) {
17238
17239             # How many space characters to place before this token
17240             # for special alignment.  Actual padding is done in the
17241             # continue block.
17242
17243             # looking for next unvisited indentation item ...
17244             my $indentation = $leading_spaces_to_go[$i];
17245
17246             # This is just for indentation objects (c098)
17247             next unless ( ref($indentation) );
17248
17249             # Visit each indentation object just once
17250             next if ( $indentation->get_marked() );
17251
17252             # Mark first visit
17253             $indentation->set_marked(1);
17254
17255             # Skip indentation objects which do not align with container tokens
17256             my $align_seqno = $indentation->get_align_seqno();
17257             next unless ($align_seqno);
17258
17259             # Skip a container which is entirely on this line
17260             my $Ko = $K_opening_container->{$align_seqno};
17261             my $Kc = $K_closing_container->{$align_seqno};
17262             if ( defined($Ko) && defined($Kc) ) {
17263                 next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
17264             }
17265
17266             if ( $line == 1 && $i == $ibeg ) {
17267                 $do_not_pad = 1;
17268             }
17269
17270             #--------------------------------------------
17271             # Now see what the error is and try to fix it
17272             #--------------------------------------------
17273             my $closing_index = $indentation->get_closed();
17274             my $predicted_pos = $indentation->get_spaces();
17275
17276             # Find actual position:
17277             my $actual_pos;
17278
17279             if ( $i == $ibeg ) {
17280
17281                 # Case 1: token is first character of of batch - table lookup
17282                 if ( $line == 0 ) {
17283
17284                     $actual_pos = $predicted_pos;
17285
17286                     my ( $indent, $offset, $is_leading, $exists ) =
17287                       get_saved_opening_indentation($align_seqno);
17288                     if ( defined($indent) ) {
17289
17290                         # FIXME: should use '1' here if no space after opening
17291                         # and '2' if want space; hardwired at 1 like -gnu-style
17292                         $actual_pos = get_spaces($indent) + $offset + 1;
17293                     }
17294                 }
17295
17296                 # Case 2: token starts a new line - use length of previous line
17297                 else {
17298
17299                     my $ibegm = $ri_first->[ $line - 1 ];
17300                     my $iendm = $ri_last->[ $line - 1 ];
17301                     $actual_pos = total_line_length( $ibegm, $iendm );
17302
17303                     # follow -pt style
17304                     ++$actual_pos
17305                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
17306
17307                 }
17308             }
17309
17310             # Case 3: $i>$ibeg: token is mid-line - use length to previous token
17311             else {
17312
17313                 $actual_pos = total_line_length( $ibeg, $i - 1 );
17314
17315                 # for mid-line token, we must check to see if all
17316                 # additional lines have continuation indentation,
17317                 # and remove it if so.  Otherwise, we do not get
17318                 # good alignment.
17319                 if ( $closing_index > $iend ) {
17320                     my $ibeg_next = $ri_first->[ $line + 1 ];
17321                     if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
17322                         $self->undo_lp_ci( $line, $i, $closing_index,
17323                             $ri_first, $ri_last );
17324                     }
17325                 }
17326             }
17327
17328             # By how many spaces (plus or minus) would we need to increase the
17329             # indentation to get alignment with the opening token?
17330             my $move_right = $actual_pos - $predicted_pos;
17331
17332             if (DEBUG_CORRECT_LP) {
17333                 my $tok   = substr( $tokens_to_go[$i], 0, 8 );
17334                 my $avail = $self->get_available_spaces_to_go($ibeg);
17335                 print
17336 "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
17337             }
17338
17339             # nothing more to do if no error to correct (gnu2.t)
17340             if ( $move_right == 0 ) {
17341                 $indentation->set_recoverable_spaces($move_right);
17342                 next;
17343             }
17344
17345             # Get any collapsed length defined for -xlp
17346             my $collapsed_length =
17347               $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
17348             $collapsed_length = 0 unless ( defined($collapsed_length) );
17349
17350             if (DEBUG_CORRECT_LP) {
17351                 print
17352 "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
17353             }
17354
17355             # if we have not seen closure for this indentation in this batch,
17356             # and do not have a collapsed length estimate, we can only pass on
17357             # a request to the vertical aligner
17358             if ( $closing_index < 0 && !$collapsed_length ) {
17359                 $indentation->set_recoverable_spaces($move_right);
17360                 next;
17361             }
17362
17363             # If necessary, look ahead to see if there is really any leading
17364             # whitespace dependent on this whitespace, and also find the
17365             # longest line using this whitespace.  Since it is always safe to
17366             # move left if there are no dependents, we only need to do this if
17367             # we may have dependent nodes or need to move right.
17368
17369             my $have_child = $indentation->get_have_child();
17370             my %saw_indentation;
17371             my $line_count = 1;
17372             $saw_indentation{$indentation} = $indentation;
17373
17374             # How far can we move right before we hit the limit?
17375             # let $right_margen = the number of spaces that we can increase
17376             # the current indentation before hitting the maximum line length.
17377             my $right_margin = 0;
17378
17379             if ( $have_child || $move_right > 0 ) {
17380                 $have_child = 0;
17381
17382                 # include estimated collapsed length for incomplete containers
17383                 my $max_length = 0;
17384                 if ( $Kc > $K_to_go[$max_index_to_go] ) {
17385                     $max_length = $collapsed_length + $predicted_pos;
17386                 }
17387
17388                 if ( $i == $ibeg ) {
17389                     my $length = total_line_length( $ibeg, $iend );
17390                     if ( $length > $max_length ) { $max_length = $length }
17391                 }
17392
17393                 # look ahead at the rest of the lines of this batch..
17394                 foreach my $line_t ( $line + 1 .. $max_line ) {
17395                     my $ibeg_t = $ri_first->[$line_t];
17396                     my $iend_t = $ri_last->[$line_t];
17397                     last if ( $closing_index <= $ibeg_t );
17398
17399                     # remember all different indentation objects
17400                     my $indentation_t = $leading_spaces_to_go[$ibeg_t];
17401                     $saw_indentation{$indentation_t} = $indentation_t;
17402                     $line_count++;
17403
17404                     # remember longest line in the group
17405                     my $length_t = total_line_length( $ibeg_t, $iend_t );
17406                     if ( $length_t > $max_length ) {
17407                         $max_length = $length_t;
17408                     }
17409                 }
17410
17411                 $right_margin =
17412                   $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
17413                   $max_length;
17414                 if ( $right_margin < 0 ) { $right_margin = 0 }
17415             }
17416
17417             my $first_line_comma_count =
17418               grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
17419             my $comma_count = $indentation->get_comma_count();
17420             my $arrow_count = $indentation->get_arrow_count();
17421
17422             # This is a simple approximate test for vertical alignment:
17423             # if we broke just after an opening paren, brace, bracket,
17424             # and there are 2 or more commas in the first line,
17425             # and there are no '=>'s,
17426             # then we are probably vertically aligned.  We could set
17427             # an exact flag in sub break_lists, but this is good
17428             # enough.
17429             my $indentation_count = keys %saw_indentation;
17430             my $is_vertically_aligned =
17431               (      $i == $ibeg
17432                   && $first_line_comma_count > 1
17433                   && $indentation_count == 1
17434                   && ( $arrow_count == 0 || $arrow_count == $line_count ) );
17435
17436             # Make the move if possible ..
17437             if (
17438
17439                 # we can always move left
17440                 $move_right < 0
17441
17442                 # -xlp
17443
17444                 # incomplete container
17445                 || (   $rOpts_extended_line_up_parentheses
17446                     && $Kc > $K_to_go[$max_index_to_go] )
17447                 || $closing_index < 0
17448
17449                 # but we should only move right if we are sure it will
17450                 # not spoil vertical alignment
17451                 || ( $comma_count == 0 )
17452                 || ( $comma_count > 0 && !$is_vertically_aligned )
17453               )
17454             {
17455                 my $move =
17456                   ( $move_right <= $right_margin )
17457                   ? $move_right
17458                   : $right_margin;
17459
17460                 if (DEBUG_CORRECT_LP) {
17461                     print
17462                       "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
17463                 }
17464
17465                 foreach ( keys %saw_indentation ) {
17466                     $saw_indentation{$_}
17467                       ->permanently_decrease_available_spaces( -$move );
17468                 }
17469             }
17470
17471             # Otherwise, record what we want and the vertical aligner
17472             # will try to recover it.
17473             else {
17474                 $indentation->set_recoverable_spaces($move_right);
17475             }
17476         } ## end loop over tokens in a line
17477     } ## end loop over lines
17478     return $do_not_pad;
17479 } ## end sub correct_lp_indentation
17480
17481 sub undo_lp_ci {
17482
17483     # If there is a single, long parameter within parens, like this:
17484     #
17485     #  $self->command( "/msg "
17486     #        . $infoline->chan
17487     #        . " You said $1, but did you know that it's square was "
17488     #        . $1 * $1 . " ?" );
17489     #
17490     # we can remove the continuation indentation of the 2nd and higher lines
17491     # to achieve this effect, which is more pleasing:
17492     #
17493     #  $self->command("/msg "
17494     #                 . $infoline->chan
17495     #                 . " You said $1, but did you know that it's square was "
17496     #                 . $1 * $1 . " ?");
17497
17498     my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
17499       @_;
17500     my $max_line = @{$ri_first} - 1;
17501
17502     # must be multiple lines
17503     return unless $max_line > $line_open;
17504
17505     my $lev_start     = $levels_to_go[$i_start];
17506     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
17507
17508     # see if all additional lines in this container have continuation
17509     # indentation
17510     my $line_1 = 1 + $line_open;
17511     my $n      = $line_open;
17512
17513     while ( ++$n <= $max_line ) {
17514         my $ibeg = $ri_first->[$n];
17515         my $iend = $ri_last->[$n];
17516         if ( $ibeg eq $closing_index ) { $n--; last }
17517         return if ( $lev_start != $levels_to_go[$ibeg] );
17518         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
17519         last   if ( $closing_index <= $iend );
17520     }
17521
17522     # we can reduce the indentation of all continuation lines
17523     my $continuation_line_count = $n - $line_open;
17524     @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17525       (0) x ($continuation_line_count);
17526     @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
17527       @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
17528     return;
17529 } ## end sub undo_lp_ci
17530
17531 ###############################################
17532 # CODE SECTION 10: Code to break long statments
17533 ###############################################
17534
17535 sub break_long_lines {
17536
17537     #-----------------------------------------------------------
17538     # Break a batch of tokens into lines which do not exceed the
17539     # maximum line length.
17540     #-----------------------------------------------------------
17541
17542     # Define an array of indexes for inserting newline characters to
17543     # keep the line lengths below the maximum desired length.  There is
17544     # an implied break after the last token, so it need not be included.
17545
17546     # Method:
17547     # This routine is part of series of routines which adjust line
17548     # lengths.  It is only called if a statement is longer than the
17549     # maximum line length, or if a preliminary scanning located
17550     # desirable break points.   Sub break_lists has already looked at
17551     # these tokens and set breakpoints (in array
17552     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17553     # after commas, after opening parens, and before closing parens).
17554     # This routine will honor these breakpoints and also add additional
17555     # breakpoints as necessary to keep the line length below the maximum
17556     # requested.  It bases its decision on where the 'bond strength' is
17557     # lowest.
17558
17559     # Output: returns references to the arrays:
17560     #  @i_first
17561     #  @i_last
17562     # which contain the indexes $i of the first and last tokens on each
17563     # line.
17564
17565     # In addition, the array:
17566     #   $forced_breakpoint_to_go[$i]
17567     # may be updated to be =1 for any index $i after which there must be
17568     # a break.  This signals later routines not to undo the breakpoint.
17569
17570     my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
17571
17572     # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
17573     # order.
17574
17575     use constant DEBUG_BREAK_LINES => 0;
17576
17577     my @i_first        = ();    # the first index to output
17578     my @i_last         = ();    # the last index to output
17579     my @i_colon_breaks = ();    # needed to decide if we have to break at ?'s
17580     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17581
17582     my $rbond_strength_to_go = $self->set_bond_strengths();
17583
17584     # Add any comma bias set by break_lists
17585     if ( @{$rbond_strength_bias} ) {
17586         foreach my $item ( @{$rbond_strength_bias} ) {
17587             my ( $ii, $bias ) = @{$item};
17588             if ( $ii >= 0 && $ii <= $max_index_to_go ) {
17589                 $rbond_strength_to_go->[$ii] += $bias;
17590             }
17591             elsif (DEVEL_MODE) {
17592                 my $KK  = $K_to_go[0];
17593                 my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
17594                 Fault(
17595 "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
17596                 );
17597             }
17598         }
17599     }
17600
17601     my $imin = 0;
17602     my $imax = $max_index_to_go;
17603     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17604     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17605     my $i_begin = $imin;    # index for starting next iteration
17606
17607     my $leading_spaces          = leading_spaces_to_go($imin);
17608     my $line_count              = 0;
17609     my $last_break_strength     = NO_BREAK;
17610     my $i_last_break            = -1;
17611     my $max_bias                = 0.001;
17612     my $tiny_bias               = 0.0001;
17613     my $leading_alignment_token = EMPTY_STRING;
17614     my $leading_alignment_type  = EMPTY_STRING;
17615
17616     # see if any ?/:'s are in order
17617     my $colons_in_order = 1;
17618     my $last_tok        = EMPTY_STRING;
17619     foreach ( @{$rcolon_list} ) {
17620         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17621         $last_tok = $_;
17622     }
17623
17624     # This is a sufficient but not necessary condition for colon chain
17625     my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
17626
17627     my $Msg = EMPTY_STRING;
17628
17629     #-------------------------------------------------------
17630     # BEGINNING of main loop to set continuation breakpoints
17631     # Keep iterating until we reach the end
17632     #-------------------------------------------------------
17633     while ( $i_begin <= $imax ) {
17634         my $lowest_strength        = NO_BREAK;
17635         my $starting_sum           = $summed_lengths_to_go[$i_begin];
17636         my $i_lowest               = -1;
17637         my $i_test                 = -1;
17638         my $lowest_next_token      = EMPTY_STRING;
17639         my $lowest_next_type       = 'b';
17640         my $i_lowest_next_nonblank = -1;
17641         my $maximum_line_length =
17642           $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
17643
17644         # Do not separate an isolated bare word from an opening paren.
17645         # Alternate Fix #2 for issue b1299.  This waits as long as possible
17646         # to make the decision.
17647         if ( $types_to_go[$i_begin] eq 'i'
17648             && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
17649         {
17650             my $i_next_nonblank = $inext_to_go[$i_begin];
17651             if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
17652                 $rbond_strength_to_go->[$i_begin] = NO_BREAK;
17653             }
17654         }
17655
17656         #-------------------------------------------------------
17657         # BEGINNING of inner loop to find the best next breakpoint
17658         #-------------------------------------------------------
17659         my $strength = NO_BREAK;
17660         $i_test = $i_begin - 1;
17661         while ( ++$i_test <= $imax ) {
17662             my $type                     = $types_to_go[$i_test];
17663             my $token                    = $tokens_to_go[$i_test];
17664             my $next_type                = $types_to_go[ $i_test + 1 ];
17665             my $next_token               = $tokens_to_go[ $i_test + 1 ];
17666             my $i_next_nonblank          = $inext_to_go[$i_test];
17667             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17668             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17669             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17670
17671             # adjustments to the previous bond strength may have been made, and
17672             # we must keep the bond strength of a token and its following blank
17673             # the same;
17674             my $last_strength = $strength;
17675             $strength = $rbond_strength_to_go->[$i_test];
17676             if ( $type eq 'b' ) { $strength = $last_strength }
17677
17678             # reduce strength a bit to break ties at an old comma breakpoint ...
17679             if (
17680
17681                 $old_breakpoint_to_go[$i_test]
17682
17683                 # Patch: limited to just commas to avoid blinking states
17684                 && $type eq ','
17685
17686                 # which is a 'good' breakpoint, meaning ...
17687                 # we don't want to break before it
17688                 && !$want_break_before{$type}
17689
17690                 # and either we want to break before the next token
17691                 # or the next token is not short (i.e. not a '*', '/' etc.)
17692                 && $i_next_nonblank <= $imax
17693                 && (   $want_break_before{$next_nonblank_type}
17694                     || $token_lengths_to_go[$i_next_nonblank] > 2
17695                     || $next_nonblank_type eq ','
17696                     || $is_opening_type{$next_nonblank_type} )
17697               )
17698             {
17699                 $strength -= $tiny_bias;
17700                 DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
17701             }
17702
17703             # otherwise increase strength a bit if this token would be at the
17704             # maximum line length.  This is necessary to avoid blinking
17705             # in the above example when the -iob flag is added.
17706             else {
17707                 my $len =
17708                   $leading_spaces +
17709                   $summed_lengths_to_go[ $i_test + 1 ] -
17710                   $starting_sum;
17711                 if ( $len >= $maximum_line_length ) {
17712                     $strength += $tiny_bias;
17713                     DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
17714                 }
17715             }
17716
17717             my $must_break = 0;
17718
17719             # Force an immediate break at certain operators
17720             # with lower level than the start of the line,
17721             # unless we've already seen a better break.
17722             #
17723             #------------------------------------
17724             # Note on an issue with a preceding ?
17725             #------------------------------------
17726             # We don't include a ? in the above list, but there may
17727             # be a break at a previous ? if the line is long.
17728             # Because of this we do not want to force a break if
17729             # there is a previous ? on this line.  For now the best way
17730             # to do this is to not break if we have seen a lower strength
17731             # point, which is probably a ?.
17732             #
17733             # Example of unwanted breaks we are avoiding at a '.' following a ?
17734             # from pod2html using perltidy -gnu:
17735             # )
17736             # ? "\n&lt;A NAME=\""
17737             # . $value
17738             # . "\"&gt;\n$text&lt;/A&gt;\n"
17739             # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
17740             if (
17741                 ( $strength <= $lowest_strength )
17742                 && ( $nesting_depth_to_go[$i_begin] >
17743                     $nesting_depth_to_go[$i_next_nonblank] )
17744                 && (
17745                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17746                     || (
17747                         $next_nonblank_type eq 'k'
17748
17749                         ##  /^(and|or)$/  # note: includes 'xor' now
17750                         && $is_and_or{$next_nonblank_token}
17751                     )
17752                 )
17753               )
17754             {
17755                 $self->set_forced_breakpoint($i_next_nonblank);
17756                 DEBUG_BREAK_LINES
17757                   && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
17758             }
17759
17760             if (
17761
17762                 # Try to put a break where requested by break_lists
17763                 $forced_breakpoint_to_go[$i_test]
17764
17765                 # break between ) { in a continued line so that the '{' can
17766                 # be outdented
17767                 # See similar logic in break_lists which catches instances
17768                 # where a line is just something like ') {'.  We have to
17769                 # be careful because the corresponding block keyword might
17770                 # not be on the first line, such as 'for' here:
17771                 #
17772                 # eval {
17773                 #     for ("a") {
17774                 #         for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
17775                 #     }
17776                 # };
17777                 #
17778                 || (
17779                        $line_count
17780                     && ( $token eq ')' )
17781                     && ( $next_nonblank_type eq '{' )
17782                     && ($next_nonblank_block_type)
17783                     && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
17784
17785                     # RT #104427: Dont break before opening sub brace because
17786                     # sub block breaks handled at higher level, unless
17787                     # it looks like the preceding list is long and broken
17788                     && !(
17789
17790                         (
17791                                $next_nonblank_block_type =~ /$SUB_PATTERN/
17792                             || $next_nonblank_block_type =~ /$ASUB_PATTERN/
17793                         )
17794                         && ( $nesting_depth_to_go[$i_begin] ==
17795                             $nesting_depth_to_go[$i_next_nonblank] )
17796                     )
17797
17798                     && !$rOpts_opening_brace_always_on_right
17799                 )
17800
17801                 # There is an implied forced break at a terminal opening brace
17802                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17803               )
17804             {
17805
17806                 # Forced breakpoints must sometimes be overridden, for example
17807                 # because of a side comment causing a NO_BREAK.  It is easier
17808                 # to catch this here than when they are set.
17809                 if ( $strength < NO_BREAK - 1 ) {
17810                     $strength   = $lowest_strength - $tiny_bias;
17811                     $must_break = 1;
17812                     DEBUG_BREAK_LINES
17813                       && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
17814                 }
17815             }
17816
17817             # quit if a break here would put a good terminal token on
17818             # the next line and we already have a possible break
17819             if (
17820                    !$must_break
17821                 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
17822                 && (
17823                     (
17824                         $leading_spaces +
17825                         $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
17826                         $starting_sum
17827                     ) > $maximum_line_length
17828                 )
17829               )
17830             {
17831                 if ( $i_lowest >= 0 ) {
17832                     DEBUG_BREAK_LINES && do {
17833                         $Msg .= " :quit at good terminal='$next_nonblank_type'";
17834                     };
17835                     last;
17836                 }
17837             }
17838
17839             # Avoid a break which would strand a single punctuation
17840             # token.  For example, we do not want to strand a leading
17841             # '.' which is followed by a long quoted string.
17842             # But note that we do want to do this with -extrude (l=1)
17843             # so please test any changes to this code on -extrude.
17844             if (
17845                    !$must_break
17846                 && ( $i_test == $i_begin )
17847                 && ( $i_test < $imax )
17848                 && ( $token eq $type )
17849                 && (
17850                     (
17851                         $leading_spaces +
17852                         $summed_lengths_to_go[ $i_test + 1 ] -
17853                         $starting_sum
17854                     ) < $maximum_line_length
17855                 )
17856               )
17857             {
17858                 $i_test = min( $imax, $inext_to_go[$i_test] );
17859                 DEBUG_BREAK_LINES && do {
17860                     $Msg .= " :redo at i=$i_test";
17861                 };
17862                 redo;
17863             }
17864
17865             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17866             {
17867
17868                 # break at previous best break if it would have produced
17869                 # a leading alignment of certain common tokens, and it
17870                 # is different from the latest candidate break
17871                 if ($leading_alignment_type) {
17872                     DEBUG_BREAK_LINES && do {
17873                         $Msg .=
17874 " :last at leading_alignment='$leading_alignment_type'";
17875                     };
17876                     last;
17877                 }
17878
17879                 # Force at least one breakpoint if old code had good
17880                 # break It is only called if a breakpoint is required or
17881                 # desired.  This will probably need some adjustments
17882                 # over time.  A goal is to try to be sure that, if a new
17883                 # side comment is introduced into formatted text, then
17884                 # the same breakpoints will occur.  scbreak.t
17885                 if (
17886                     $i_test == $imax            # we are at the end
17887                     && !$forced_breakpoint_count
17888                     && $saw_good_break          # old line had good break
17889                     && $type =~ /^[#;\{]$/      # and this line ends in
17890                                                 # ';' or side comment
17891                     && $i_last_break < 0        # and we haven't made a break
17892                     && $i_lowest >= 0           # and we saw a possible break
17893                     && $i_lowest < $imax - 1    # (but not just before this ;)
17894                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17895                   )
17896                 {
17897
17898                     DEBUG_BREAK_LINES && do {
17899                         $Msg .= " :last at good old break\n";
17900                     };
17901                     last;
17902                 }
17903
17904                 # Do not skip past an important break point in a short final
17905                 # segment.  For example, without this check we would miss the
17906                 # break at the final / in the following code:
17907                 #
17908                 #  $depth_stop =
17909                 #    ( $tau * $mass_pellet * $q_0 *
17910                 #        ( 1. - exp( -$t_stop / $tau ) ) -
17911                 #        4. * $pi * $factor * $k_ice *
17912                 #        ( $t_melt - $t_ice ) *
17913                 #        $r_pellet *
17914                 #        $t_stop ) /
17915                 #    ( $rho_ice * $Qs * $pi * $r_pellet**2 );
17916                 #
17917                 if (
17918                        $line_count > 2
17919                     && $i_lowest >= 0    # and we saw a possible break
17920                     && $i_lowest < $i_test
17921                     && $i_test > $imax - 2
17922                     && $nesting_depth_to_go[$i_begin] >
17923                     $nesting_depth_to_go[$i_lowest]
17924                     && $lowest_strength < $last_break_strength - .5 * WEAK
17925                   )
17926                 {
17927                     # Make this break for math operators for now
17928                     my $ir = $inext_to_go[$i_lowest];
17929                     my $il = $iprev_to_go[$ir];
17930                     if (   $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
17931                         || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
17932                     {
17933                         DEBUG_BREAK_LINES && do {
17934                             $Msg .= " :last-noskip_short";
17935                         };
17936                         last;
17937                     }
17938                 }
17939
17940                 # Update the minimum bond strength location
17941                 $lowest_strength        = $strength;
17942                 $i_lowest               = $i_test;
17943                 $lowest_next_token      = $next_nonblank_token;
17944                 $lowest_next_type       = $next_nonblank_type;
17945                 $i_lowest_next_nonblank = $i_next_nonblank;
17946                 if ($must_break) {
17947                     DEBUG_BREAK_LINES && do {
17948                         $Msg .= " :last-must_break";
17949                     };
17950                     last;
17951                 }
17952
17953                 # set flags to remember if a break here will produce a
17954                 # leading alignment of certain common tokens
17955                 if (   $line_count > 0
17956                     && $i_test < $imax
17957                     && ( $lowest_strength - $last_break_strength <= $max_bias )
17958                   )
17959                 {
17960                     my $i_last_end = $iprev_to_go[$i_begin];
17961                     my $tok_beg    = $tokens_to_go[$i_begin];
17962                     my $type_beg   = $types_to_go[$i_begin];
17963                     if (
17964
17965                         # check for leading alignment of certain tokens
17966                         (
17967                                $tok_beg eq $next_nonblank_token
17968                             && $is_chain_operator{$tok_beg}
17969                             && (   $type_beg eq 'k'
17970                                 || $type_beg eq $tok_beg )
17971                             && $nesting_depth_to_go[$i_begin] >=
17972                             $nesting_depth_to_go[$i_next_nonblank]
17973                         )
17974
17975                         || (   $tokens_to_go[$i_last_end] eq $token
17976                             && $is_chain_operator{$token}
17977                             && ( $type eq 'k' || $type eq $token )
17978                             && $nesting_depth_to_go[$i_last_end] >=
17979                             $nesting_depth_to_go[$i_test] )
17980                       )
17981                     {
17982                         $leading_alignment_token = $next_nonblank_token;
17983                         $leading_alignment_type  = $next_nonblank_type;
17984                     }
17985                 }
17986             }
17987
17988             my $too_long = ( $i_test >= $imax );
17989             if ( !$too_long ) {
17990                 my $next_length =
17991                   $leading_spaces +
17992                   $summed_lengths_to_go[ $i_test + 2 ] -
17993                   $starting_sum;
17994                 $too_long = $next_length > $maximum_line_length;
17995
17996                 # To prevent blinkers we will avoid leaving a token exactly at
17997                 # the line length limit unless it is the last token or one of
17998                 # several "good" types.
17999                 #
18000                 # The following code was a blinker with -pbp before this
18001                 # modification:
18002 ##                    $last_nonblank_token eq '('
18003 ##                        && $is_indirect_object_taker{ $paren_type
18004 ##                            [$paren_depth] }
18005                 # The issue causing the problem is that if the
18006                 # term [$paren_depth] gets broken across a line then
18007                 # the whitespace routine doesn't see both opening and closing
18008                 # brackets and will format like '[ $paren_depth ]'.  This
18009                 # leads to an oscillation in length depending if we break
18010                 # before the closing bracket or not.
18011                 if (  !$too_long
18012                     && $i_test + 1 < $imax
18013                     && $next_nonblank_type ne ','
18014                     && !$is_closing_type{$next_nonblank_type} )
18015                 {
18016                     $too_long = $next_length >= $maximum_line_length;
18017                     DEBUG_BREAK_LINES && do {
18018                         $Msg .= " :too_long=$too_long" if ($too_long);
18019                     }
18020                 }
18021             }
18022
18023             DEBUG_BREAK_LINES && do {
18024                 my $ltok = $token;
18025                 my $rtok =
18026                   $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
18027                 my $i_testp2 = $i_test + 2;
18028                 if ( $i_testp2 > $max_index_to_go + 1 ) {
18029                     $i_testp2 = $max_index_to_go + 1;
18030                 }
18031                 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
18032                 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
18033                 print STDOUT
18034 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength    $ltok $rtok\n";
18035             };
18036
18037             # allow one extra terminal token after exceeding line length
18038             # if it would strand this token.
18039             if (   $rOpts_fuzzy_line_length
18040                 && $too_long
18041                 && $i_lowest == $i_test
18042                 && $token_lengths_to_go[$i_test] > 1
18043                 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
18044               )
18045             {
18046                 $too_long = 0;
18047                 DEBUG_BREAK_LINES && do {
18048                     $Msg .= " :do_not_strand next='$next_nonblank_type'";
18049                 };
18050             }
18051
18052             # we are done if...
18053             if (
18054
18055                 # ... no more space and we have a break
18056                 $too_long && $i_lowest >= 0
18057
18058                 # ... or no more tokens
18059                 || $i_test == $imax
18060               )
18061             {
18062                 DEBUG_BREAK_LINES && do {
18063                     $Msg .=
18064 " :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
18065                 };
18066                 last;
18067             }
18068         }
18069
18070         #-------------------------------------------------------
18071         # END of inner loop to find the best next breakpoint
18072         # Now decide exactly where to put the breakpoint
18073         #-------------------------------------------------------
18074
18075         # it's always ok to break at imax if no other break was found
18076         if ( $i_lowest < 0 ) { $i_lowest = $imax }
18077
18078         # semi-final index calculation
18079         my $i_next_nonblank     = $inext_to_go[$i_lowest];
18080         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
18081         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18082
18083         #-------------------------------------------------------
18084         # ?/: rule 1 : if a break here will separate a '?' on this
18085         # line from its closing ':', then break at the '?' instead.
18086         #-------------------------------------------------------
18087         foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
18088             next unless ( $tokens_to_go[$i] eq '?' );
18089
18090             # do not break if probable sequence of ?/: statements
18091             next if ($is_colon_chain);
18092
18093             # do not break if statement is broken by side comment
18094             next
18095               if ( $tokens_to_go[$max_index_to_go] eq '#'
18096                 && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
18097
18098             # no break needed if matching : is also on the line
18099             next
18100               if ( $mate_index_to_go[$i] >= 0
18101                 && $mate_index_to_go[$i] <= $i_next_nonblank );
18102
18103             $i_lowest = $i;
18104             if ( $want_break_before{'?'} ) { $i_lowest-- }
18105             last;
18106         }
18107
18108         #-------------------------------------------------------
18109         # END of inner loop to find the best next breakpoint:
18110         # Break the line after the token with index i=$i_lowest
18111         #-------------------------------------------------------
18112
18113         # final index calculation
18114         $i_next_nonblank     = $inext_to_go[$i_lowest];
18115         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
18116         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18117
18118         DEBUG_BREAK_LINES
18119           && print STDOUT
18120 "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
18121         $Msg = EMPTY_STRING;
18122
18123         #-------------------------------------------------------
18124         # ?/: rule 2 : if we break at a '?', then break at its ':'
18125         #
18126         # Note: this rule is also in sub break_lists to handle a break
18127         # at the start and end of a line (in case breaks are dictated
18128         # by side comments).
18129         #-------------------------------------------------------
18130         if ( $next_nonblank_type eq '?' ) {
18131             $self->set_closing_breakpoint($i_next_nonblank);
18132         }
18133         elsif ( $types_to_go[$i_lowest] eq '?' ) {
18134             $self->set_closing_breakpoint($i_lowest);
18135         }
18136
18137         #-------------------------------------------------------
18138         # ?/: rule 3 : if we break at a ':' then we save
18139         # its location for further work below.  We may need to go
18140         # back and break at its '?'.
18141         #-------------------------------------------------------
18142         if ( $next_nonblank_type eq ':' ) {
18143             push @i_colon_breaks, $i_next_nonblank;
18144         }
18145         elsif ( $types_to_go[$i_lowest] eq ':' ) {
18146             push @i_colon_breaks, $i_lowest;
18147         }
18148
18149         # here we should set breaks for all '?'/':' pairs which are
18150         # separated by this line
18151
18152         $line_count++;
18153
18154         # save this line segment, after trimming blanks at the ends
18155         push( @i_first,
18156             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
18157         push( @i_last,
18158             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
18159
18160         # set a forced breakpoint at a container opening, if necessary, to
18161         # signal a break at a closing container.  Excepting '(' for now.
18162         if (
18163             (
18164                    $tokens_to_go[$i_lowest] eq '{'
18165                 || $tokens_to_go[$i_lowest] eq '['
18166             )
18167             && !$forced_breakpoint_to_go[$i_lowest]
18168           )
18169         {
18170             $self->set_closing_breakpoint($i_lowest);
18171         }
18172
18173         # get ready to go again
18174         $i_begin                 = $i_lowest + 1;
18175         $last_break_strength     = $lowest_strength;
18176         $i_last_break            = $i_lowest;
18177         $leading_alignment_token = EMPTY_STRING;
18178         $leading_alignment_type  = EMPTY_STRING;
18179         $lowest_next_token       = EMPTY_STRING;
18180         $lowest_next_type        = 'b';
18181
18182         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
18183             $i_begin++;
18184         }
18185
18186         # update indentation size
18187         if ( $i_begin <= $imax ) {
18188             $leading_spaces = leading_spaces_to_go($i_begin);
18189             DEBUG_BREAK_LINES
18190               && print STDOUT
18191               "updating leading spaces to be $leading_spaces at i=$i_begin\n";
18192         }
18193     }
18194
18195     #-------------------------------------------------------
18196     # END of main loop to set continuation breakpoints
18197     # Now go back and make any necessary corrections
18198     #-------------------------------------------------------
18199
18200     #-------------------------------------------------------
18201     # ?/: rule 4 -- if we broke at a ':', then break at
18202     # corresponding '?' unless this is a chain of ?: expressions
18203     #-------------------------------------------------------
18204     if (@i_colon_breaks) {
18205
18206         # using a simple method for deciding if we are in a ?/: chain --
18207         # this is a chain if it has multiple ?/: pairs all in order;
18208         # otherwise not.
18209         # Note that if line starts in a ':' we count that above as a break
18210         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
18211
18212         unless ($is_chain) {
18213             my @insert_list = ();
18214             foreach (@i_colon_breaks) {
18215                 my $i_question = $mate_index_to_go[$_];
18216                 if ( $i_question >= 0 ) {
18217                     if ( $want_break_before{'?'} ) {
18218                         $i_question = $iprev_to_go[$i_question];
18219                     }
18220
18221                     if ( $i_question >= 0 ) {
18222                         push @insert_list, $i_question;
18223                     }
18224                 }
18225                 $self->insert_additional_breaks( \@insert_list, \@i_first,
18226                     \@i_last );
18227             }
18228         }
18229     }
18230     return ( \@i_first, \@i_last, $rbond_strength_to_go );
18231 } ## end sub break_long_lines
18232
18233 ###########################################
18234 # CODE SECTION 11: Code to break long lists
18235 ###########################################
18236
18237 {    ## begin closure break_lists
18238
18239     # These routines and variables are involved in finding good
18240     # places to break long lists.
18241
18242     use constant DEBUG_BREAK_LISTS => 0;
18243
18244     my (
18245         $block_type,                $current_depth,
18246         $depth,                     $i,
18247         $i_last_nonblank_token,     $last_nonblank_token,
18248         $last_nonblank_type,        $last_nonblank_block_type,
18249         $last_old_breakpoint_count, $minimum_depth,
18250         $next_nonblank_block_type,  $next_nonblank_token,
18251         $next_nonblank_type,        $old_breakpoint_count,
18252         $starting_breakpoint_count, $starting_depth,
18253         $token,                     $type,
18254         $type_sequence,
18255     );
18256
18257     my (
18258         @breakpoint_stack,              @breakpoint_undo_stack,
18259         @comma_index,                   @container_type,
18260         @identifier_count_stack,        @index_before_arrow,
18261         @interrupted_list,              @item_count_stack,
18262         @last_comma_index,              @last_dot_index,
18263         @last_nonblank_type,            @old_breakpoint_count_stack,
18264         @opening_structure_index_stack, @rfor_semicolon_list,
18265         @has_old_logical_breakpoints,   @rand_or_list,
18266         @i_equals,                      @override_cab3,
18267         @type_sequence_stack,
18268     );
18269
18270     # these arrays must retain values between calls
18271     my ( @has_broken_sublist, @dont_align, @want_comma_break );
18272
18273     my $length_tol;
18274     my $lp_tol_boost;
18275     my $list_stress_level;
18276
18277     sub initialize_break_lists {
18278         @dont_align         = ();
18279         @has_broken_sublist = ();
18280         @want_comma_break   = ();
18281
18282         #---------------------------------------------------
18283         # Set tolerances to prevent formatting instabilities
18284         #---------------------------------------------------
18285
18286         # Define tolerances to use when checking if closed
18287         # containers will fit on one line.  This is necessary to avoid
18288         # formatting instability. The basic tolerance is based on the
18289         # following:
18290
18291         # - Always allow for at least one extra space after a closing token so
18292         # that we do not strand a comma or semicolon. (oneline.t).
18293
18294         # - Use an increased line length tolerance when -ci > -i to avoid
18295         # blinking states (case b923 and others).
18296         $length_tol =
18297           1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
18298
18299         # In addition, it may be necessary to use a few extra tolerance spaces
18300         # when -lp is used and/or when -xci is used.  The history of this
18301         # so far is as follows:
18302
18303         # FIX1: At least 3 characters were been found to be required for -lp
18304         # to fixes cases b1059 b1063 b1117.
18305
18306         # FIX2: Further testing showed that we need a total of 3 extra spaces
18307         # when -lp is set for non-lists, and at least 2 spaces when -lp and
18308         # -xci are set.
18309         # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
18310         # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
18311         # b1165
18312
18313         # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
18314         # 'find_token_starting_list' to go back before an initial blank space.
18315         # This fixed these three cases, and allowed the tolerances to be
18316         # reduced to continue to fix all other known cases of instability.
18317         # This gives the current tolerance formulation.
18318
18319         $lp_tol_boost = 0;
18320
18321         if ($rOpts_line_up_parentheses) {
18322
18323             # boost tol for combination -lp -xci
18324             if ($rOpts_extended_continuation_indentation) {
18325                 $lp_tol_boost = 2;
18326             }
18327
18328             # boost tol for combination -lp and any -vtc > 0, but only for
18329             # non-list containers
18330             else {
18331                 foreach ( keys %closing_vertical_tightness ) {
18332                     next
18333                       unless ( $closing_vertical_tightness{$_} );
18334                     $lp_tol_boost = 1;    # Fixes B1193;
18335                     last;
18336                 }
18337             }
18338         }
18339
18340         # Define a level where list formatting becomes highly stressed and
18341         # needs to be simplified. Introduced for case b1262.
18342         $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
18343
18344         return;
18345     } ## end sub initialize_break_lists
18346
18347     # routine to define essential variables when we go 'up' to
18348     # a new depth
18349     sub check_for_new_minimum_depth {
18350         my ( $self, $depth_t, $seqno ) = @_;
18351         if ( $depth_t < $minimum_depth ) {
18352
18353             $minimum_depth = $depth_t;
18354
18355             # these arrays need not retain values between calls
18356             $type_sequence_stack[$depth_t] = $seqno;
18357             $override_cab3[$depth_t] =
18358                  $rOpts_comma_arrow_breakpoints == 3
18359               && $seqno
18360               && $self->[_roverride_cab3_]->{$seqno};
18361
18362             $override_cab3[$depth_t]          = undef;
18363             $breakpoint_stack[$depth_t]       = $starting_breakpoint_count;
18364             $container_type[$depth_t]         = EMPTY_STRING;
18365             $identifier_count_stack[$depth_t] = 0;
18366             $index_before_arrow[$depth_t]     = -1;
18367             $interrupted_list[$depth_t]       = 1;
18368             $item_count_stack[$depth_t]       = 0;
18369             $last_nonblank_type[$depth_t]     = EMPTY_STRING;
18370             $opening_structure_index_stack[$depth_t] = -1;
18371
18372             $breakpoint_undo_stack[$depth_t]       = undef;
18373             $comma_index[$depth_t]                 = undef;
18374             $last_comma_index[$depth_t]            = undef;
18375             $last_dot_index[$depth_t]              = undef;
18376             $old_breakpoint_count_stack[$depth_t]  = undef;
18377             $has_old_logical_breakpoints[$depth_t] = 0;
18378             $rand_or_list[$depth_t]                = [];
18379             $rfor_semicolon_list[$depth_t]         = [];
18380             $i_equals[$depth_t]                    = -1;
18381
18382             # these arrays must retain values between calls
18383             if ( !defined( $has_broken_sublist[$depth_t] ) ) {
18384                 $dont_align[$depth_t]         = 0;
18385                 $has_broken_sublist[$depth_t] = 0;
18386                 $want_comma_break[$depth_t]   = 0;
18387             }
18388         }
18389         return;
18390     } ## end sub check_for_new_minimum_depth
18391
18392     # routine to decide which commas to break at within a container;
18393     # returns:
18394     #   $bp_count = number of comma breakpoints set
18395     #   $do_not_break_apart = a flag indicating if container need not
18396     #     be broken open
18397     sub set_comma_breakpoints {
18398
18399         my ( $self, $dd, $rbond_strength_bias ) = @_;
18400         my $bp_count           = 0;
18401         my $do_not_break_apart = 0;
18402
18403         # Do not break a list unless there are some non-line-ending commas.
18404         # This avoids getting different results with only non-essential commas,
18405         # and fixes b1192.
18406         my $seqno = $type_sequence_stack[$dd];
18407         my $real_comma_count =
18408           $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
18409
18410         # anything to do?
18411         if ( $item_count_stack[$dd] ) {
18412
18413             # handle commas not in containers...
18414             if ( $dont_align[$dd] ) {
18415                 $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
18416             }
18417
18418             # handle commas within containers...
18419             elsif ($real_comma_count) {
18420                 my $fbc = $forced_breakpoint_count;
18421
18422                 # always open comma lists not preceded by keywords,
18423                 # barewords, identifiers (that is, anything that doesn't
18424                 # look like a function call)
18425                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
18426
18427                 $self->set_comma_breakpoints_do(
18428                     {
18429                         depth            => $dd,
18430                         i_opening_paren  => $opening_structure_index_stack[$dd],
18431                         i_closing_paren  => $i,
18432                         item_count       => $item_count_stack[$dd],
18433                         identifier_count => $identifier_count_stack[$dd],
18434                         rcomma_index     => $comma_index[$dd],
18435                         next_nonblank_type  => $next_nonblank_type,
18436                         list_type           => $container_type[$dd],
18437                         interrupted         => $interrupted_list[$dd],
18438                         rdo_not_break_apart => \$do_not_break_apart,
18439                         must_break_open     => $must_break_open,
18440                         has_broken_sublist  => $has_broken_sublist[$dd],
18441                     }
18442                 );
18443                 $bp_count           = $forced_breakpoint_count - $fbc;
18444                 $do_not_break_apart = 0 if $must_break_open;
18445             }
18446         }
18447         return ( $bp_count, $do_not_break_apart );
18448     } ## end sub set_comma_breakpoints
18449
18450     # These types are excluded at breakpoints to prevent blinking
18451     # Switched from excluded to included as part of fix for b1214
18452     my %is_uncontained_comma_break_included_type;
18453
18454     BEGIN {
18455
18456         my @q = qw< k R } ) ] Y Z U w i q Q .
18457           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
18458         @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
18459     }
18460
18461     sub do_uncontained_comma_breaks {
18462
18463         # Handle commas not in containers...
18464         # This is a catch-all routine for commas that we
18465         # don't know what to do with because the don't fall
18466         # within containers.  We will bias the bond strength
18467         # to break at commas which ended lines in the input
18468         # file.  This usually works better than just trying
18469         # to put as many items on a line as possible.  A
18470         # downside is that if the input file is garbage it
18471         # won't work very well. However, the user can always
18472         # prevent following the old breakpoints with the
18473         # -iob flag.
18474         my ( $self, $dd, $rbond_strength_bias ) = @_;
18475
18476         # Check added for issue c131; an error here would be due to an
18477         # error initializing @comma_index when entering depth $dd.
18478         if (DEVEL_MODE) {
18479             foreach my $ii ( @{ $comma_index[$dd] } ) {
18480                 if ( $ii < 0 || $ii > $max_index_to_go ) {
18481                     my $KK  = $K_to_go[0];
18482                     my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
18483                     Fault(<<EOM);
18484 Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
18485 EOM
18486                 }
18487             }
18488         }
18489
18490         my $bias                  = -.01;
18491         my $old_comma_break_count = 0;
18492         foreach my $ii ( @{ $comma_index[$dd] } ) {
18493
18494             if ( $old_breakpoint_to_go[$ii] ) {
18495                 $old_comma_break_count++;
18496
18497                 # Store the bias info for use by sub set_bond_strength
18498                 push @{$rbond_strength_bias}, [ $ii, $bias ];
18499
18500                 # reduce bias magnitude to force breaks in order
18501                 $bias *= 0.99;
18502             }
18503         }
18504
18505         # Also put a break before the first comma if
18506         # (1) there was a break there in the input, and
18507         # (2) there was exactly one old break before the first comma break
18508         # (3) OLD: there are multiple old comma breaks
18509         # (3) NEW: there are one or more old comma breaks (see return example)
18510         # (4) the first comma is at the starting level ...
18511         #     ... fixes cases b064 b065 b068 b210 b747
18512         # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
18513         #     ... fixes b1220.  If ci>0 we are in the middle of a snippet,
18514         #     maybe because -boc has been forcing out previous lines.
18515
18516         # For example, we will follow the user and break after
18517         # 'print' in this snippet:
18518         #    print
18519         #      "conformability (Not the same dimension)\n",
18520         #      "\t", $have, " is ", text_unit($hu), "\n",
18521         #      "\t", $want, " is ", text_unit($wu), "\n",
18522         #      ;
18523         #
18524         # Another example, just one comma, where we will break after
18525         # the return:
18526         #  return
18527         #    $x * cos($a) - $y * sin($a),
18528         #    $x * sin($a) + $y * cos($a);
18529
18530         # Breaking a print statement:
18531         # print SAVEOUT
18532         #   ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
18533         #   ( $? & 128 ) ? " -- core dumped" : "", "\n";
18534         #
18535         #  But we will not force a break after the opening paren here
18536         #  (causes a blinker):
18537         #        $heap->{stream}->set_output_filter(
18538         #            poe::filter::reference->new('myotherfreezer') ),
18539         #          ;
18540         #
18541         my $i_first_comma = $comma_index[$dd]->[0];
18542         my $level_comma   = $levels_to_go[$i_first_comma];
18543         my $ci_start      = $ci_levels_to_go[0];
18544
18545         # Here we want to use the value of ci before any -xci adjustment
18546         if ( $ci_start && $rOpts_extended_continuation_indentation ) {
18547             my $K0 = $K_to_go[0];
18548             if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
18549         }
18550         if (  !$ci_start
18551             && $old_breakpoint_to_go[$i_first_comma]
18552             && $level_comma == $levels_to_go[0] )
18553         {
18554             my $ibreak    = -1;
18555             my $obp_count = 0;
18556             foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
18557                 if ( $old_breakpoint_to_go[$ii] ) {
18558                     $obp_count++;
18559                     last if ( $obp_count > 1 );
18560                     $ibreak = $ii
18561                       if ( $levels_to_go[$ii] == $level_comma );
18562                 }
18563             }
18564
18565             # Changed rule from multiple old commas to just one here:
18566             if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
18567             {
18568                 my $ibreak_m = $ibreak;
18569                 $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
18570                 if ( $ibreak_m >= 0 ) {
18571
18572                     # In order to avoid blinkers we have to be fairly
18573                     # restrictive:
18574
18575                     # OLD Rules:
18576                     #  Rule 1: Do not to break before an opening token
18577                     #  Rule 2: avoid breaking at ternary operators
18578                     #  (see b931, which is similar to the above print example)
18579                     #  Rule 3: Do not break at chain operators to fix case b1119
18580                     #   - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
18581
18582                     # NEW Rule, replaced above rules after case b1214:
18583                     #  only break at one of the included types
18584
18585                     # Be sure to test any changes to these rules against runs
18586                     # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
18587                     # series.
18588                     my $type_m = $types_to_go[$ibreak_m];
18589
18590                     # Switched from excluded to included for b1214. If necessary
18591                     # the token could also be checked if type_m eq 'k'
18592                     if ( $is_uncontained_comma_break_included_type{$type_m} ) {
18593                         $self->set_forced_breakpoint($ibreak);
18594                     }
18595                 }
18596             }
18597         }
18598         return;
18599     } ## end sub do_uncontained_comma_breaks
18600
18601     my %is_logical_container;
18602     my %quick_filter;
18603
18604     BEGIN {
18605         my @q = qw# if elsif unless while and or err not && | || ? : ! #;
18606         @is_logical_container{@q} = (1) x scalar(@q);
18607
18608         # This filter will allow most tokens to skip past a section of code
18609         %quick_filter = %is_assignment;
18610         @q            = qw# => . ; < > ~ #;
18611         push @q, ',';
18612         @quick_filter{@q} = (1) x scalar(@q);
18613     }
18614
18615     sub set_for_semicolon_breakpoints {
18616         my ( $self, $dd ) = @_;
18617         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
18618             $self->set_forced_breakpoint($_);
18619         }
18620         return;
18621     }
18622
18623     sub set_logical_breakpoints {
18624         my ( $self, $dd ) = @_;
18625         if (
18626                $item_count_stack[$dd] == 0
18627             && $is_logical_container{ $container_type[$dd] }
18628
18629             || $has_old_logical_breakpoints[$dd]
18630           )
18631         {
18632
18633             # Look for breaks in this order:
18634             # 0   1    2   3
18635             # or  and  ||  &&
18636             foreach my $i ( 0 .. 3 ) {
18637                 if ( $rand_or_list[$dd][$i] ) {
18638                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
18639                         $self->set_forced_breakpoint($_);
18640                     }
18641
18642                     # break at any 'if' and 'unless' too
18643                     foreach ( @{ $rand_or_list[$dd][4] } ) {
18644                         $self->set_forced_breakpoint($_);
18645                     }
18646                     $rand_or_list[$dd] = [];
18647                     last;
18648                 }
18649             }
18650         }
18651         return;
18652     } ## end sub set_logical_breakpoints
18653
18654     sub is_unbreakable_container {
18655
18656         # never break a container of one of these types
18657         # because bad things can happen (map1.t)
18658         my $dd = shift;
18659         return $is_sort_map_grep{ $container_type[$dd] };
18660     }
18661
18662     sub break_lists {
18663
18664         my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
18665
18666         #----------------------------------------------------------------------
18667         # This routine is called once per batch, if the batch is a list, to set
18668         # line breaks so that hierarchical structure can be displayed and so
18669         # that list items can be vertically aligned.  The output of this
18670         # routine is stored in the array @forced_breakpoint_to_go, which is
18671         # used by sub 'break_long_lines' to set final breakpoints.
18672         #----------------------------------------------------------------------
18673
18674         my $rLL                  = $self->[_rLL_];
18675         my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
18676         my $ris_broken_container = $self->[_ris_broken_container_];
18677         my $rbreak_before_container_by_seqno =
18678           $self->[_rbreak_before_container_by_seqno_];
18679
18680         $starting_depth = $nesting_depth_to_go[0];
18681
18682         $block_type                = SPACE;
18683         $current_depth             = $starting_depth;
18684         $i                         = -1;
18685         $last_nonblank_token       = ';';
18686         $last_nonblank_type        = ';';
18687         $last_nonblank_block_type  = SPACE;
18688         $last_old_breakpoint_count = 0;
18689         $minimum_depth = $current_depth + 1;    # forces update in check below
18690         $old_breakpoint_count      = 0;
18691         $starting_breakpoint_count = $forced_breakpoint_count;
18692         $token                     = ';';
18693         $type                      = ';';
18694         $type_sequence             = EMPTY_STRING;
18695
18696         my $total_depth_variation = 0;
18697         my $i_old_assignment_break;
18698         my $depth_last = $starting_depth;
18699         my $comma_follows_last_closing_token;
18700
18701         $self->check_for_new_minimum_depth( $current_depth,
18702             $parent_seqno_to_go[0] );
18703
18704         my $want_previous_breakpoint = -1;
18705
18706         my $saw_good_breakpoint;
18707         my $i_line_end   = -1;
18708         my $i_line_start = -1;
18709         my $i_last_colon = -1;
18710
18711         #----------------------------------------
18712         # Main loop over all tokens in this batch
18713         #----------------------------------------
18714         while ( ++$i <= $max_index_to_go ) {
18715             if ( $type ne 'b' ) {
18716                 $i_last_nonblank_token    = $i - 1;
18717                 $last_nonblank_type       = $type;
18718                 $last_nonblank_token      = $token;
18719                 $last_nonblank_block_type = $block_type;
18720             } ## end if ( $type ne 'b' )
18721             $type          = $types_to_go[$i];
18722             $block_type    = $block_type_to_go[$i];
18723             $token         = $tokens_to_go[$i];
18724             $type_sequence = $type_sequence_to_go[$i];
18725             my $next_type       = $types_to_go[ $i + 1 ];
18726             my $next_token      = $tokens_to_go[ $i + 1 ];
18727             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
18728             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
18729             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
18730             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18731
18732             # set break if flag was set
18733             if ( $want_previous_breakpoint >= 0 ) {
18734                 $self->set_forced_breakpoint($want_previous_breakpoint);
18735                 $want_previous_breakpoint = -1;
18736             }
18737
18738             $last_old_breakpoint_count = $old_breakpoint_count;
18739
18740             # Fixed for case b1097 to not consider old breaks at highly
18741             # stressed locations, such as types 'L' and 'R'.  It might be
18742             # useful to generalize this concept in the future by looking at
18743             # actual bond strengths.
18744             if (   $old_breakpoint_to_go[$i]
18745                 && $type ne 'L'
18746                 && $next_nonblank_type ne 'R' )
18747             {
18748                 $i_line_end   = $i;
18749                 $i_line_start = $i_next_nonblank;
18750
18751                 $old_breakpoint_count++;
18752
18753                 # Break before certain keywords if user broke there and
18754                 # this is a 'safe' break point. The idea is to retain
18755                 # any preferred breaks for sequential list operations,
18756                 # like a schwartzian transform.
18757                 if ($rOpts_break_at_old_keyword_breakpoints) {
18758                     if (
18759                            $next_nonblank_type eq 'k'
18760                         && $is_keyword_returning_list{$next_nonblank_token}
18761                         && (   $type =~ /^[=\)\]\}Riw]$/
18762                             || $type eq 'k'
18763                             && $is_keyword_returning_list{$token} )
18764                       )
18765                     {
18766
18767                         # we actually have to set this break next time through
18768                         # the loop because if we are at a closing token (such
18769                         # as '}') which forms a one-line block, this break might
18770                         # get undone.
18771
18772                         # And do not do this at an equals if the user wants
18773                         # breaks before an equals (blinker cases b434 b903)
18774                         unless ( $type eq '=' && $want_break_before{$type} ) {
18775                             $want_previous_breakpoint = $i;
18776                         }
18777                     } ## end if ( $next_nonblank_type...)
18778                 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
18779
18780                 # Break before attributes if user broke there
18781                 if ($rOpts_break_at_old_attribute_breakpoints) {
18782                     if ( $next_nonblank_type eq 'A' ) {
18783                         $want_previous_breakpoint = $i;
18784                     }
18785                 }
18786
18787                 # remember an = break as possible good break point
18788                 if ( $is_assignment{$type} ) {
18789                     $i_old_assignment_break = $i;
18790                 }
18791                 elsif ( $is_assignment{$next_nonblank_type} ) {
18792                     $i_old_assignment_break = $i_next_nonblank;
18793                 }
18794             } ## end if ( $old_breakpoint_to_go...)
18795
18796             next if ( $type eq 'b' );
18797             $depth = $nesting_depth_to_go[ $i + 1 ];
18798
18799             $total_depth_variation += abs( $depth - $depth_last );
18800             $depth_last = $depth;
18801
18802             # safety check - be sure we always break after a comment
18803             # Shouldn't happen .. an error here probably means that the
18804             # nobreak flag did not get turned off correctly during
18805             # formatting.
18806             if ( $type eq '#' ) {
18807                 if ( $i != $max_index_to_go ) {
18808                     if (DEVEL_MODE) {
18809                         Fault(<<EOM);
18810 Non-fatal program bug: backup logic required to break after a comment
18811 EOM
18812                     }
18813                     $nobreak_to_go[$i] = 0;
18814                     $self->set_forced_breakpoint($i);
18815                 } ## end if ( $i != $max_index_to_go)
18816             } ## end if ( $type eq '#' )
18817
18818             # Force breakpoints at certain tokens in long lines.
18819             # Note that such breakpoints will be undone later if these tokens
18820             # are fully contained within parens on a line.
18821             if (
18822
18823                 # break before a keyword within a line
18824                 $type eq 'k'
18825                 && $i > 0
18826
18827                 # if one of these keywords:
18828                 && $is_if_unless_while_until_for_foreach{$token}
18829
18830                 # but do not break at something like '1 while'
18831                 && ( $last_nonblank_type ne 'n' || $i > 2 )
18832
18833                 # and let keywords follow a closing 'do' brace
18834                 && $last_nonblank_block_type ne 'do'
18835
18836                 && (
18837                     $is_long_line
18838
18839                     # or container is broken (by side-comment, etc)
18840                     || (   $next_nonblank_token eq '('
18841                         && $mate_index_to_go[$i_next_nonblank] < $i )
18842                 )
18843               )
18844             {
18845                 $self->set_forced_breakpoint( $i - 1 );
18846             } ## end if ( $type eq 'k' && $i...)
18847
18848             # remember locations of '||'  and '&&' for possible breaks if we
18849             # decide this is a long logical expression.
18850             if ( $type eq '||' ) {
18851                 push @{ $rand_or_list[$depth][2] }, $i;
18852                 ++$has_old_logical_breakpoints[$depth]
18853                   if ( ( $i == $i_line_start || $i == $i_line_end )
18854                     && $rOpts_break_at_old_logical_breakpoints );
18855             } ## end elsif ( $type eq '||' )
18856             elsif ( $type eq '&&' ) {
18857                 push @{ $rand_or_list[$depth][3] }, $i;
18858                 ++$has_old_logical_breakpoints[$depth]
18859                   if ( ( $i == $i_line_start || $i == $i_line_end )
18860                     && $rOpts_break_at_old_logical_breakpoints );
18861             } ## end elsif ( $type eq '&&' )
18862             elsif ( $type eq 'f' ) {
18863                 push @{ $rfor_semicolon_list[$depth] }, $i;
18864             }
18865             elsif ( $type eq 'k' ) {
18866                 if ( $token eq 'and' ) {
18867                     push @{ $rand_or_list[$depth][1] }, $i;
18868                     ++$has_old_logical_breakpoints[$depth]
18869                       if ( ( $i == $i_line_start || $i == $i_line_end )
18870                         && $rOpts_break_at_old_logical_breakpoints );
18871                 } ## end if ( $token eq 'and' )
18872
18873                 # break immediately at 'or's which are probably not in a logical
18874                 # block -- but we will break in logical breaks below so that
18875                 # they do not add to the forced_breakpoint_count
18876                 elsif ( $token eq 'or' ) {
18877                     push @{ $rand_or_list[$depth][0] }, $i;
18878                     ++$has_old_logical_breakpoints[$depth]
18879                       if ( ( $i == $i_line_start || $i == $i_line_end )
18880                         && $rOpts_break_at_old_logical_breakpoints );
18881                     if ( $is_logical_container{ $container_type[$depth] } ) {
18882                     }
18883                     else {
18884                         if ($is_long_line) { $self->set_forced_breakpoint($i) }
18885                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
18886                             && $rOpts_break_at_old_logical_breakpoints )
18887                         {
18888                             $saw_good_breakpoint = 1;
18889                         }
18890                     } ## end else [ if ( $is_logical_container...)]
18891                 } ## end elsif ( $token eq 'or' )
18892                 elsif ( $token eq 'if' || $token eq 'unless' ) {
18893                     push @{ $rand_or_list[$depth][4] }, $i;
18894                     if ( ( $i == $i_line_start || $i == $i_line_end )
18895                         && $rOpts_break_at_old_logical_breakpoints )
18896                     {
18897                         $self->set_forced_breakpoint($i);
18898                     }
18899                 } ## end elsif ( $token eq 'if' ||...)
18900             } ## end elsif ( $type eq 'k' )
18901             elsif ( $is_assignment{$type} ) {
18902                 $i_equals[$depth] = $i;
18903             }
18904
18905             if ($type_sequence) {
18906
18907                 # handle any postponed closing breakpoints
18908                 if ( $is_closing_sequence_token{$token} ) {
18909                     if ( $type eq ':' ) {
18910                         $i_last_colon = $i;
18911
18912                         # retain break at a ':' line break
18913                         if (   ( $i == $i_line_start || $i == $i_line_end )
18914                             && $rOpts_break_at_old_ternary_breakpoints
18915                             && $levels_to_go[$i] < $list_stress_level )
18916                         {
18917
18918                             $self->set_forced_breakpoint($i);
18919
18920                             # Break at a previous '=', but only if it is before
18921                             # the mating '?'. Mate_index test fixes b1287.
18922                             my $ieq = $i_equals[$depth];
18923                             if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
18924                                 $self->set_forced_breakpoint(
18925                                     $i_equals[$depth] );
18926                                 $i_equals[$depth] = -1;
18927                             }
18928                         } ## end if ( ( $i == $i_line_start...))
18929                     } ## end if ( $type eq ':' )
18930                     if ( has_postponed_breakpoint($type_sequence) ) {
18931                         my $inc = ( $type eq ':' ) ? 0 : 1;
18932                         if ( $i >= $inc ) {
18933                             $self->set_forced_breakpoint( $i - $inc );
18934                         }
18935                     }
18936                 } ## end if ( $is_closing_sequence_token{$token} )
18937
18938                 # set breaks at ?/: if they will get separated (and are
18939                 # not a ?/: chain), or if the '?' is at the end of the
18940                 # line
18941                 elsif ( $token eq '?' ) {
18942                     my $i_colon = $mate_index_to_go[$i];
18943                     if (
18944                         $i_colon <= 0  # the ':' is not in this batch
18945                         || $i == 0     # this '?' is the first token of the line
18946                         || $i ==
18947                         $max_index_to_go    # or this '?' is the last token
18948                       )
18949                     {
18950
18951                         # don't break if # this has a side comment, and
18952                         # don't break at a '?' if preceded by ':' on
18953                         # this line of previous ?/: pair on this line.
18954                         # This is an attempt to preserve a chain of ?/:
18955                         # expressions (elsif2.t).
18956                         if (
18957                             (
18958                                    $i_last_colon < 0
18959                                 || $parent_seqno_to_go[$i_last_colon] !=
18960                                 $parent_seqno_to_go[$i]
18961                             )
18962                             && $tokens_to_go[$max_index_to_go] ne '#'
18963                           )
18964                         {
18965                             $self->set_forced_breakpoint($i);
18966                         }
18967                         $self->set_closing_breakpoint($i);
18968                     } ## end if ( $i_colon <= 0  ||...)
18969                 } ## end elsif ( $token eq '?' )
18970
18971                 elsif ( $is_opening_token{$token} ) {
18972
18973                     # do requested -lp breaks at the OPENING token for BROKEN
18974                     # blocks.  NOTE: this can be done for both -lp and -xlp,
18975                     # but only -xlp can really take advantage of this.  So this
18976                     # is currently restricted to -xlp to avoid excess changes to
18977                     # existing -lp formatting.
18978                     if (   $rOpts_extended_line_up_parentheses
18979                         && $mate_index_to_go[$i] < 0 )
18980                     {
18981                         my $lp_object =
18982                           $self->[_rlp_object_by_seqno_]->{$type_sequence};
18983                         if ($lp_object) {
18984                             my $K_begin_line = $lp_object->get_K_begin_line();
18985                             my $i_begin_line = $K_begin_line - $K_to_go[0];
18986                             $self->set_forced_lp_break( $i_begin_line, $i );
18987                         }
18988                     }
18989                 }
18990
18991             } ## end if ($type_sequence)
18992
18993 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
18994
18995             #------------------------------------------------------------
18996             # Handle Increasing Depth..
18997             #
18998             # prepare for a new list when depth increases
18999             # token $i is a '(','{', or '['
19000             #------------------------------------------------------------
19001             # hardened against bad input syntax: depth jump must be 1 and type
19002             # must be opening..fixes c102
19003             if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
19004
19005                 #----------------------------------------------------------
19006                 # BEGIN initialize depth arrays
19007                 # ... use the same order as sub check_for_new_minimum_depth
19008                 #----------------------------------------------------------
19009                 $type_sequence_stack[$depth] = $type_sequence;
19010                 $override_cab3[$depth] =
19011                      $rOpts_comma_arrow_breakpoints == 3
19012                   && $type_sequence
19013                   && $self->[_roverride_cab3_]->{$type_sequence};
19014
19015                 $breakpoint_stack[$depth] = $forced_breakpoint_count;
19016                 $container_type[$depth] =
19017
19018                   #      k => && || ? : .
19019                   $is_container_label_type{$last_nonblank_type}
19020                   ? $last_nonblank_token
19021                   : EMPTY_STRING;
19022                 $identifier_count_stack[$depth]        = 0;
19023                 $index_before_arrow[$depth]            = -1;
19024                 $interrupted_list[$depth]              = 0;
19025                 $item_count_stack[$depth]              = 0;
19026                 $last_nonblank_type[$depth]            = $last_nonblank_type;
19027                 $opening_structure_index_stack[$depth] = $i;
19028
19029                 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
19030                 $comma_index[$depth]           = undef;
19031                 $last_comma_index[$depth]      = undef;
19032                 $last_dot_index[$depth]        = undef;
19033                 $old_breakpoint_count_stack[$depth]  = $old_breakpoint_count;
19034                 $has_old_logical_breakpoints[$depth] = 0;
19035                 $rand_or_list[$depth]                = [];
19036                 $rfor_semicolon_list[$depth]         = [];
19037                 $i_equals[$depth]                    = -1;
19038
19039                 # if line ends here then signal closing token to break
19040                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
19041                 {
19042                     $self->set_closing_breakpoint($i);
19043                 }
19044
19045                 # Not all lists of values should be vertically aligned..
19046                 $dont_align[$depth] =
19047
19048                   # code BLOCKS are handled at a higher level
19049                   ( $block_type ne EMPTY_STRING )
19050
19051                   # certain paren lists
19052                   || ( $type eq '(' ) && (
19053
19054                     # it does not usually look good to align a list of
19055                     # identifiers in a parameter list, as in:
19056                     #    my($var1, $var2, ...)
19057                     # (This test should probably be refined, for now I'm just
19058                     # testing for any keyword)
19059                     ( $last_nonblank_type eq 'k' )
19060
19061                     # a trailing '(' usually indicates a non-list
19062                     || ( $next_nonblank_type eq '(' )
19063                   );
19064                 $has_broken_sublist[$depth] = 0;
19065                 $want_comma_break[$depth]   = 0;
19066
19067                 #-------------------------------------
19068                 # END initialize depth arrays
19069                 #-------------------------------------
19070
19071                 # patch to outdent opening brace of long if/for/..
19072                 # statements (like this one).  See similar coding in
19073                 # set_continuation breaks.  We have also catch it here for
19074                 # short line fragments which otherwise will not go through
19075                 # break_long_lines.
19076                 if (
19077                     $block_type
19078
19079                     # if we have the ')' but not its '(' in this batch..
19080                     && ( $last_nonblank_token eq ')' )
19081                     && $mate_index_to_go[$i_last_nonblank_token] < 0
19082
19083                     # and user wants brace to left
19084                     && !$rOpts_opening_brace_always_on_right
19085
19086                     && ( $type eq '{' )     # should be true
19087                     && ( $token eq '{' )    # should be true
19088                   )
19089                 {
19090                     $self->set_forced_breakpoint( $i - 1 );
19091                 } ## end if ( $block_type && ( ...))
19092             } ## end if ( $depth > $current_depth)
19093
19094             #------------------------------------------------------------
19095             # Handle Decreasing Depth..
19096             #
19097             # finish off any old list when depth decreases
19098             # token $i is a ')','}', or ']'
19099             #------------------------------------------------------------
19100             # hardened against bad input syntax: depth jump must be 1 and type
19101             # must be closing .. fixes c102
19102             elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
19103
19104                 $self->check_for_new_minimum_depth( $depth,
19105                     $parent_seqno_to_go[$i] );
19106
19107                 $comma_follows_last_closing_token =
19108                   $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
19109
19110                 # force all outer logical containers to break after we see on
19111                 # old breakpoint
19112                 $has_old_logical_breakpoints[$depth] ||=
19113                   $has_old_logical_breakpoints[$current_depth];
19114
19115                 # Patch to break between ') {' if the paren list is broken.
19116                 # There is similar logic in break_long_lines for
19117                 # non-broken lists.
19118                 if (   $token eq ')'
19119                     && $next_nonblank_block_type
19120                     && $interrupted_list[$current_depth]
19121                     && $next_nonblank_type eq '{'
19122                     && !$rOpts_opening_brace_always_on_right )
19123                 {
19124                     $self->set_forced_breakpoint($i);
19125                 } ## end if ( $token eq ')' && ...
19126
19127 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
19128
19129                 # set breaks at commas if necessary
19130                 my ( $bp_count, $do_not_break_apart ) =
19131                   $self->set_comma_breakpoints( $current_depth,
19132                     $rbond_strength_bias );
19133
19134                 my $i_opening = $opening_structure_index_stack[$current_depth];
19135                 my $saw_opening_structure = ( $i_opening >= 0 );
19136                 my $lp_object;
19137                 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
19138                     $lp_object = $self->[_rlp_object_by_seqno_]
19139                       ->{ $type_sequence_to_go[$i_opening] };
19140                 }
19141
19142                 # this term is long if we had to break at interior commas..
19143                 my $is_long_term = $bp_count > 0;
19144
19145                 # If this is a short container with one or more comma arrows,
19146                 # then we will mark it as a long term to open it if requested.
19147                 # $rOpts_comma_arrow_breakpoints =
19148                 #    0 - open only if comma precedes closing brace
19149                 #    1 - stable: except for one line blocks
19150                 #    2 - try to form 1 line blocks
19151                 #    3 - ignore =>
19152                 #    4 - always open up if vt=0
19153                 #    5 - stable: even for one line blocks if vt=0
19154
19155                 # PATCH: Modify the -cab flag if we are not processing a list:
19156                 # We only want the -cab flag to apply to list containers, so
19157                 # for non-lists we use the default and stable -cab=5 value.
19158                 # Fixes case b939a.
19159                 my $cab_flag = $rOpts_comma_arrow_breakpoints;
19160                 if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
19161                     $cab_flag = 5;
19162                 }
19163
19164                 # Ignore old breakpoints when under stress.
19165                 # Fixes b1203 b1204 as well as b1197-b1200.
19166                 # But not if -lp: fixes b1264, b1265.  NOTE: rechecked with
19167                 # b1264 to see if this check is still required at all, and
19168                 # these still require a check, but at higher level beta+3
19169                 # instead of beta:  b1193 b780
19170                 if (   $saw_opening_structure
19171                     && !$lp_object
19172                     && $levels_to_go[$i_opening] >= $list_stress_level )
19173                 {
19174                     $cab_flag = 2;
19175
19176                     # Do not break hash braces under stress (fixes b1238)
19177                     $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
19178
19179                     # This option fixes b1235, b1237, b1240 with old and new
19180                     # -lp, but formatting is nicer with next option.
19181                     ## $is_long_term ||=
19182                     ##  $levels_to_go[$i_opening] > $stress_level_beta + 1;
19183
19184                     # This option fixes b1240 but not b1235, b1237 with new -lp,
19185                     # but this gives better formatting than the previous option.
19186                     $do_not_break_apart ||=
19187                       $levels_to_go[$i_opening] > $stress_level_beta;
19188                 }
19189
19190                 if (  !$is_long_term
19191                     && $saw_opening_structure
19192                     && $is_opening_token{ $tokens_to_go[$i_opening] }
19193                     && $index_before_arrow[ $depth + 1 ] > 0
19194                     && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
19195                   )
19196                 {
19197                     $is_long_term =
19198                          $cab_flag == 4
19199                       || $cab_flag == 0 && $last_nonblank_token eq ','
19200                       || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
19201                 } ## end if ( !$is_long_term &&...)
19202
19203                 # mark term as long if the length between opening and closing
19204                 # parens exceeds allowed line length
19205                 if ( !$is_long_term && $saw_opening_structure ) {
19206
19207                     my $i_opening_minus =
19208                       $self->find_token_starting_list($i_opening);
19209
19210                     my $excess =
19211                       $self->excess_line_length( $i_opening_minus, $i );
19212
19213                     # Use standard spaces for indentation of lists in -lp mode
19214                     # if it gives a longer line length. This helps to avoid an
19215                     # instability due to forming and breaking one-line blocks.
19216                     # This fixes case b1314.
19217                     my $indentation = $leading_spaces_to_go[$i_opening_minus];
19218                     if ( ref($indentation)
19219                         && $ris_broken_container->{$type_sequence} )
19220                     {
19221                         my $lp_spaces  = $indentation->get_spaces();
19222                         my $std_spaces = $indentation->get_standard_spaces();
19223                         my $diff       = $std_spaces - $lp_spaces;
19224                         if ( $diff > 0 ) { $excess += $diff }
19225                     }
19226
19227                     my $tol = $length_tol;
19228
19229                     # boost tol for an -lp container
19230                     if (
19231                            $lp_tol_boost
19232                         && $lp_object
19233                         && ( $rOpts_extended_continuation_indentation
19234                             || !$ris_list_by_seqno->{$type_sequence} )
19235                       )
19236                     {
19237                         $tol += $lp_tol_boost;
19238                     }
19239
19240                     # Patch to avoid blinking with -bbxi=2 and -cab=2
19241                     # in which variations in -ci cause unstable formatting
19242                     # in edge cases. We just always add one ci level so that
19243                     # the formatting is independent of the -BBX results.
19244                     # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
19245                     # b1161 b1166 b1167 b1168
19246                     if (  !$ci_levels_to_go[$i_opening]
19247                         && $rbreak_before_container_by_seqno->{$type_sequence} )
19248                     {
19249                         $tol += $rOpts->{'continuation-indentation'};
19250                     }
19251
19252                     $is_long_term = $excess + $tol > 0;
19253
19254                 } ## end if ( !$is_long_term &&...)
19255
19256                 # We've set breaks after all comma-arrows.  Now we have to
19257                 # undo them if this can be a one-line block
19258                 # (the only breakpoints set will be due to comma-arrows)
19259
19260                 if (
19261
19262                     # user doesn't require breaking after all comma-arrows
19263                     ( $cab_flag != 0 ) && ( $cab_flag != 4 )
19264
19265                     # and if the opening structure is in this batch
19266                     && $saw_opening_structure
19267
19268                     # and either on the same old line
19269                     && (
19270                         $old_breakpoint_count_stack[$current_depth] ==
19271                         $last_old_breakpoint_count
19272
19273                         # or user wants to form long blocks with arrows
19274                         || $cab_flag == 2
19275
19276                         # if -cab=3 is overridden then use -cab=2 behavior
19277                         || $cab_flag == 3 && $override_cab3[$current_depth]
19278                     )
19279
19280                     # and we made breakpoints between the opening and closing
19281                     && ( $breakpoint_undo_stack[$current_depth] <
19282                         $forced_breakpoint_undo_count )
19283
19284                     # and this block is short enough to fit on one line
19285                     # Note: use < because need 1 more space for possible comma
19286                     && !$is_long_term
19287
19288                   )
19289                 {
19290                     $self->undo_forced_breakpoint_stack(
19291                         $breakpoint_undo_stack[$current_depth] );
19292                 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
19293
19294                 # now see if we have any comma breakpoints left
19295                 my $has_comma_breakpoints =
19296                   ( $breakpoint_stack[$current_depth] !=
19297                       $forced_breakpoint_count );
19298
19299                 # update broken-sublist flag of the outer container
19300                 $has_broken_sublist[$depth] =
19301                      $has_broken_sublist[$depth]
19302                   || $has_broken_sublist[$current_depth]
19303                   || $is_long_term
19304                   || $has_comma_breakpoints;
19305
19306 # Having come to the closing ')', '}', or ']', now we have to decide if we
19307 # should 'open up' the structure by placing breaks at the opening and
19308 # closing containers.  This is a tricky decision.  Here are some of the
19309 # basic considerations:
19310 #
19311 # -If this is a BLOCK container, then any breakpoints will have already
19312 # been set (and according to user preferences), so we need do nothing here.
19313 #
19314 # -If we have a comma-separated list for which we can align the list items,
19315 # then we need to do so because otherwise the vertical aligner cannot
19316 # currently do the alignment.
19317 #
19318 # -If this container does itself contain a container which has been broken
19319 # open, then it should be broken open to properly show the structure.
19320 #
19321 # -If there is nothing to align, and no other reason to break apart,
19322 # then do not do it.
19323 #
19324 # We will not break open the parens of a long but 'simple' logical expression.
19325 # For example:
19326 #
19327 # This is an example of a simple logical expression and its formatting:
19328 #
19329 #     if ( $bigwasteofspace1 && $bigwasteofspace2
19330 #         || $bigwasteofspace3 && $bigwasteofspace4 )
19331 #
19332 # Most people would prefer this than the 'spacey' version:
19333 #
19334 #     if (
19335 #         $bigwasteofspace1 && $bigwasteofspace2
19336 #         || $bigwasteofspace3 && $bigwasteofspace4
19337 #     )
19338 #
19339 # To illustrate the rules for breaking logical expressions, consider:
19340 #
19341 #             FULLY DENSE:
19342 #             if ( $opt_excl
19343 #                 and ( exists $ids_excl_uc{$id_uc}
19344 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
19345 #
19346 # This is on the verge of being difficult to read.  The current default is to
19347 # open it up like this:
19348 #
19349 #             DEFAULT:
19350 #             if (
19351 #                 $opt_excl
19352 #                 and ( exists $ids_excl_uc{$id_uc}
19353 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
19354 #               )
19355 #
19356 # This is a compromise which tries to avoid being too dense and to spacey.
19357 # A more spaced version would be:
19358 #
19359 #             SPACEY:
19360 #             if (
19361 #                 $opt_excl
19362 #                 and (
19363 #                     exists $ids_excl_uc{$id_uc}
19364 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
19365 #                 )
19366 #               )
19367 #
19368 # Some people might prefer the spacey version -- an option could be added.  The
19369 # innermost expression contains a long block '( exists $ids_...  ')'.
19370 #
19371 # Here is how the logic goes: We will force a break at the 'or' that the
19372 # innermost expression contains, but we will not break apart its opening and
19373 # closing containers because (1) it contains no multi-line sub-containers itself,
19374 # and (2) there is no alignment to be gained by breaking it open like this
19375 #
19376 #             and (
19377 #                 exists $ids_excl_uc{$id_uc}
19378 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
19379 #             )
19380 #
19381 # (although this looks perfectly ok and might be good for long expressions).  The
19382 # outer 'if' container, though, contains a broken sub-container, so it will be
19383 # broken open to avoid too much density.  Also, since it contains no 'or's, there
19384 # will be a forced break at its 'and'.
19385
19386                 # Open-up if parens if requested. We do this by pretending we
19387                 # did not see the opening structure, since in that case parens
19388                 # always get opened up.
19389                 if (   $saw_opening_structure
19390                     && $rOpts_break_open_compact_parens )
19391                 {
19392
19393                     # This parameter is a one-character flag, as follows:
19394                     #  '0' matches no parens  -> break open NOT OK
19395                     #  '1' matches all parens -> break open OK
19396                     #  Other values are same as used by the weld-exclusion-list
19397                     my $flag = $rOpts_break_open_compact_parens;
19398                     if (   $flag eq '*'
19399                         || $flag eq '1' )
19400                     {
19401                         $saw_opening_structure = 0;
19402                     }
19403                     else {
19404                         my $KK = $K_to_go[$i_opening];
19405                         $saw_opening_structure =
19406                           !$self->match_paren_flag( $KK, $flag );
19407                     }
19408                 }
19409
19410                 # set some flags telling something about this container..
19411                 my $is_simple_logical_expression = 0;
19412                 if (   $item_count_stack[$current_depth] == 0
19413                     && $saw_opening_structure
19414                     && $tokens_to_go[$i_opening] eq '('
19415                     && $is_logical_container{ $container_type[$current_depth] }
19416                   )
19417                 {
19418
19419                     # This seems to be a simple logical expression with
19420                     # no existing breakpoints.  Set a flag to prevent
19421                     # opening it up.
19422                     if ( !$has_comma_breakpoints ) {
19423                         $is_simple_logical_expression = 1;
19424                     }
19425
19426                     # This seems to be a simple logical expression with
19427                     # breakpoints (broken sublists, for example).  Break
19428                     # at all 'or's and '||'s.
19429                     else {
19430                         $self->set_logical_breakpoints($current_depth);
19431                     }
19432                 } ## end if ( $item_count_stack...)
19433
19434                 if ( $is_long_term
19435                     && @{ $rfor_semicolon_list[$current_depth] } )
19436                 {
19437                     $self->set_for_semicolon_breakpoints($current_depth);
19438
19439                     # open up a long 'for' or 'foreach' container to allow
19440                     # leading term alignment unless -lp is used.
19441                     $has_comma_breakpoints = 1 unless ($lp_object);
19442                 } ## end if ( $is_long_term && ...)
19443
19444                 if (
19445
19446                     # breaks for code BLOCKS are handled at a higher level
19447                     !$block_type
19448
19449                     # we do not need to break at the top level of an 'if'
19450                     # type expression
19451                     && !$is_simple_logical_expression
19452
19453                     ## modification to keep ': (' containers vertically tight;
19454                     ## but probably better to let user set -vt=1 to avoid
19455                     ## inconsistency with other paren types
19456                     ## && ($container_type[$current_depth] ne ':')
19457
19458                     # otherwise, we require one of these reasons for breaking:
19459                     && (
19460
19461                         # - this term has forced line breaks
19462                         $has_comma_breakpoints
19463
19464                        # - the opening container is separated from this batch
19465                        #   for some reason (comment, blank line, code block)
19466                        # - this is a non-paren container spanning multiple lines
19467                         || !$saw_opening_structure
19468
19469                         # - this is a long block contained in another breakable
19470                         #   container
19471                         || $is_long_term && !$self->is_in_block_by_i($i_opening)
19472                     )
19473                   )
19474                 {
19475
19476                     # do special -lp breaks at the CLOSING token for INTACT
19477                     # blocks (because we might not do them if the block does
19478                     # not break open)
19479                     if ($lp_object) {
19480                         my $K_begin_line = $lp_object->get_K_begin_line();
19481                         my $i_begin_line = $K_begin_line - $K_to_go[0];
19482                         $self->set_forced_lp_break( $i_begin_line, $i_opening );
19483                     }
19484
19485                     # break after opening structure.
19486                     # note: break before closing structure will be automatic
19487                     if ( $minimum_depth <= $current_depth ) {
19488
19489                         if ( $i_opening >= 0 ) {
19490                             $self->set_forced_breakpoint($i_opening)
19491                               unless ( $do_not_break_apart
19492                                 || is_unbreakable_container($current_depth) );
19493                         }
19494
19495                         # break at ',' of lower depth level before opening token
19496                         if ( $last_comma_index[$depth] ) {
19497                             $self->set_forced_breakpoint(
19498                                 $last_comma_index[$depth] );
19499                         }
19500
19501                         # break at '.' of lower depth level before opening token
19502                         if ( $last_dot_index[$depth] ) {
19503                             $self->set_forced_breakpoint(
19504                                 $last_dot_index[$depth] );
19505                         }
19506
19507                         # break before opening structure if preceded by another
19508                         # closing structure and a comma.  This is normally
19509                         # done by the previous closing brace, but not
19510                         # if it was a one-line block.
19511                         if ( $i_opening > 2 ) {
19512                             my $i_prev =
19513                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
19514                               ? $i_opening - 2
19515                               : $i_opening - 1;
19516
19517                             if (
19518                                 $types_to_go[$i_prev] eq ','
19519                                 && (   $types_to_go[ $i_prev - 1 ] eq ')'
19520                                     || $types_to_go[ $i_prev - 1 ] eq '}' )
19521                               )
19522                             {
19523                                 $self->set_forced_breakpoint($i_prev);
19524                             }
19525
19526                             # also break before something like ':('  or '?('
19527                             # if appropriate.
19528                             elsif (
19529                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
19530                             {
19531                                 my $token_prev = $tokens_to_go[$i_prev];
19532                                 if ( $want_break_before{$token_prev} ) {
19533                                     $self->set_forced_breakpoint($i_prev);
19534                                 }
19535                             } ## end elsif ( $types_to_go[$i_prev...])
19536                         } ## end if ( $i_opening > 2 )
19537                     } ## end if ( $minimum_depth <=...)
19538
19539                     # break after comma following closing structure
19540                     if ( $next_type eq ',' ) {
19541                         $self->set_forced_breakpoint( $i + 1 );
19542                     }
19543
19544                     # break before an '=' following closing structure
19545                     if (
19546                         $is_assignment{$next_nonblank_type}
19547                         && ( $breakpoint_stack[$current_depth] !=
19548                             $forced_breakpoint_count )
19549                       )
19550                     {
19551                         $self->set_forced_breakpoint($i);
19552                     } ## end if ( $is_assignment{$next_nonblank_type...})
19553
19554                     # break at any comma before the opening structure Added
19555                     # for -lp, but seems to be good in general.  It isn't
19556                     # obvious how far back to look; the '5' below seems to
19557                     # work well and will catch the comma in something like
19558                     #  push @list, myfunc( $param, $param, ..
19559
19560                     my $icomma = $last_comma_index[$depth];
19561                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
19562                         unless ( $forced_breakpoint_to_go[$icomma] ) {
19563                             $self->set_forced_breakpoint($icomma);
19564                         }
19565                     }
19566                 } ## end logic to open up a container
19567
19568                 # Break open a logical container open if it was already open
19569                 elsif ($is_simple_logical_expression
19570                     && $has_old_logical_breakpoints[$current_depth] )
19571                 {
19572                     $self->set_logical_breakpoints($current_depth);
19573                 }
19574
19575                 # Handle long container which does not get opened up
19576                 elsif ($is_long_term) {
19577
19578                     # must set fake breakpoint to alert outer containers that
19579                     # they are complex
19580                     set_fake_breakpoint();
19581                 } ## end elsif ($is_long_term)
19582
19583             } ## end elsif ( $depth < $current_depth)
19584
19585             #------------------------------------------------------------
19586             # Handle this token
19587             #------------------------------------------------------------
19588
19589             $current_depth = $depth;
19590
19591             # most token types can skip the rest of this loop
19592             next unless ( $quick_filter{$type} );
19593
19594             # handle comma-arrow
19595             if ( $type eq '=>' ) {
19596                 next if ( $last_nonblank_type eq '=>' );
19597                 next if $rOpts_break_at_old_comma_breakpoints;
19598                 next
19599                   if ( $rOpts_comma_arrow_breakpoints == 3
19600                     && !$override_cab3[$depth] );
19601                 $want_comma_break[$depth]   = 1;
19602                 $index_before_arrow[$depth] = $i_last_nonblank_token;
19603                 next;
19604             } ## end if ( $type eq '=>' )
19605
19606             elsif ( $type eq '.' ) {
19607                 $last_dot_index[$depth] = $i;
19608             }
19609
19610             # Turn off alignment if we are sure that this is not a list
19611             # environment.  To be safe, we will do this if we see certain
19612             # non-list tokens, such as ';', and also the environment is
19613             # not a list.  Note that '=' could be in any of the = operators
19614             # (lextest.t). We can't just use the reported environment
19615             # because it can be incorrect in some cases.
19616             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
19617                 && !$self->is_in_list_by_i($i) )
19618             {
19619                 $dont_align[$depth]         = 1;
19620                 $want_comma_break[$depth]   = 0;
19621                 $index_before_arrow[$depth] = -1;
19622             } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
19623
19624             # now just handle any commas
19625             next unless ( $type eq ',' );
19626
19627             $last_dot_index[$depth]   = undef;
19628             $last_comma_index[$depth] = $i;
19629
19630             # break here if this comma follows a '=>'
19631             # but not if there is a side comment after the comma
19632             if ( $want_comma_break[$depth] ) {
19633
19634                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
19635                     if ($rOpts_comma_arrow_breakpoints) {
19636                         $want_comma_break[$depth] = 0;
19637                         next;
19638                     }
19639                 }
19640
19641                 $self->set_forced_breakpoint($i)
19642                   unless ( $next_nonblank_type eq '#' );
19643
19644                 # break before the previous token if it looks safe
19645                 # Example of something that we will not try to break before:
19646                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
19647                 # Also we don't want to break at a binary operator (like +):
19648                 # $c->createOval(
19649                 #    $x + $R, $y +
19650                 #    $R => $x - $R,
19651                 #    $y - $R, -fill   => 'black',
19652                 # );
19653                 my $ibreak = $index_before_arrow[$depth] - 1;
19654                 if (   $ibreak > 0
19655                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
19656                 {
19657                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
19658                     if ( $types_to_go[$ibreak] eq 'b' )  { $ibreak-- }
19659                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
19660
19661                         # don't break pointer calls, such as the following:
19662                         #  File::Spec->curdir  => 1,
19663                         # (This is tokenized as adjacent 'w' tokens)
19664                         ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
19665
19666                         # And don't break before a comma, as in the following:
19667                         # ( LONGER_THAN,=> 1,
19668                         #    EIGHTY_CHARACTERS,=> 2,
19669                         #    CAUSES_FORMATTING,=> 3,
19670                         #    LIKE_THIS,=> 4,
19671                         # );
19672                         # This example is for -tso but should be general rule
19673                         if (   $tokens_to_go[ $ibreak + 1 ] ne '->'
19674                             && $tokens_to_go[ $ibreak + 1 ] ne ',' )
19675                         {
19676                             $self->set_forced_breakpoint($ibreak);
19677                         }
19678                     } ## end if ( $types_to_go[$ibreak...])
19679                 } ## end if ( $ibreak > 0 && $tokens_to_go...)
19680
19681                 $want_comma_break[$depth]   = 0;
19682                 $index_before_arrow[$depth] = -1;
19683
19684                 # handle list which mixes '=>'s and ','s:
19685                 # treat any list items so far as an interrupted list
19686                 $interrupted_list[$depth] = 1;
19687                 next;
19688             } ## end if ( $want_comma_break...)
19689
19690             # Break after all commas above starting depth...
19691             # But only if the last closing token was followed by a comma,
19692             #   to avoid breaking a list operator (issue c119)
19693             if (   $depth < $starting_depth
19694                 && $comma_follows_last_closing_token
19695                 && !$dont_align[$depth] )
19696             {
19697                 $self->set_forced_breakpoint($i)
19698                   unless ( $next_nonblank_type eq '#' );
19699                 next;
19700             }
19701
19702             # add this comma to the list..
19703             my $item_count = $item_count_stack[$depth];
19704             if ( $item_count == 0 ) {
19705
19706                 # but do not form a list with no opening structure
19707                 # for example:
19708
19709                 #            open INFILE_COPY, ">$input_file_copy"
19710                 #              or die ("very long message");
19711                 if ( ( $opening_structure_index_stack[$depth] < 0 )
19712                     && $self->is_in_block_by_i($i) )
19713                 {
19714                     $dont_align[$depth] = 1;
19715                 }
19716             } ## end if ( $item_count == 0 )
19717
19718             $comma_index[$depth][$item_count] = $i;
19719             ++$item_count_stack[$depth];
19720             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
19721                 $identifier_count_stack[$depth]++;
19722             }
19723         } ## end while ( ++$i <= $max_index_to_go)
19724
19725         #-------------------------------------------
19726         # end of loop over all tokens in this batch
19727         #-------------------------------------------
19728
19729         # set breaks for any unfinished lists ..
19730         foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
19731
19732             $interrupted_list[$dd]   = 1;
19733             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
19734             $self->set_comma_breakpoints( $dd, $rbond_strength_bias );
19735             $self->set_logical_breakpoints($dd)
19736               if ( $has_old_logical_breakpoints[$dd] );
19737             $self->set_for_semicolon_breakpoints($dd);
19738
19739             # break open container...
19740             my $i_opening = $opening_structure_index_stack[$dd];
19741             if ( defined($i_opening) && $i_opening >= 0 ) {
19742                 $self->set_forced_breakpoint($i_opening)
19743                   unless (
19744                     is_unbreakable_container($dd)
19745
19746                     # Avoid a break which would place an isolated ' or "
19747                     # on a line
19748                     || (   $type eq 'Q'
19749                         && $i_opening >= $max_index_to_go - 2
19750                         && ( $token eq "'" || $token eq '"' ) )
19751                   );
19752             }
19753         } ## end for ( my $dd = $current_depth...)
19754
19755         # Return a flag indicating if the input file had some good breakpoints.
19756         # This flag will be used to force a break in a line shorter than the
19757         # allowed line length.
19758         if ( $has_old_logical_breakpoints[$current_depth] ) {
19759             $saw_good_breakpoint = 1;
19760         }
19761
19762         # A complex line with one break at an = has a good breakpoint.
19763         # This is not complex ($total_depth_variation=0):
19764         # $res1
19765         #   = 10;
19766         #
19767         # This is complex ($total_depth_variation=6):
19768         # $res2 =
19769         #  (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
19770
19771         # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
19772         elsif ($i_old_assignment_break
19773             && $total_depth_variation > 4
19774             && $old_breakpoint_count == 1
19775             && $i_old_assignment_break < $max_index_to_go )
19776         {
19777             $saw_good_breakpoint = 1;
19778         } ## end elsif ( $i_old_assignment_break...)
19779
19780         return $saw_good_breakpoint;
19781     } ## end sub break_lists
19782 } ## end closure break_lists
19783
19784 my %is_kwiZ;
19785 my %is_key_type;
19786
19787 BEGIN {
19788
19789     # Added 'w' to fix b1172
19790     my @q = qw(k w i Z ->);
19791     @is_kwiZ{@q} = (1) x scalar(@q);
19792
19793     # added = for b1211
19794     @q = qw<( [ { L R } ] ) = b>;
19795     push @q, ',';
19796     @is_key_type{@q} = (1) x scalar(@q);
19797 }
19798
19799 use constant DEBUG_FIND_START => 0;
19800
19801 sub find_token_starting_list {
19802
19803     # When testing to see if a block will fit on one line, some
19804     # previous token(s) may also need to be on the line; particularly
19805     # if this is a sub call.  So we will look back at least one
19806     # token.
19807     my ( $self, $i_opening_paren ) = @_;
19808
19809     # This will be the return index
19810     my $i_opening_minus = $i_opening_paren;
19811
19812     goto RETURN if ( $i_opening_minus <= 0 );
19813
19814     my $im1 = $i_opening_paren - 1;
19815     my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
19816     if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
19817         $iprev_nb -= 1;
19818         $type_prev_nb = $types_to_go[$iprev_nb];
19819     }
19820
19821     if ( $type_prev_nb eq ',' ) {
19822
19823         # a previous comma is a good break point
19824         # $i_opening_minus = $i_opening_paren;
19825     }
19826
19827     elsif (
19828         $tokens_to_go[$i_opening_paren] eq '('
19829
19830         # non-parens added here to fix case b1186
19831         || $is_kwiZ{$type_prev_nb}
19832       )
19833     {
19834         $i_opening_minus = $im1;
19835
19836         # Walk back to improve length estimate...
19837         # FIX for cases b1169 b1170 b1171: start walking back
19838         # at the previous nonblank. This makes the result insensitive
19839         # to the flag --space-function-paren, and similar.
19840         # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
19841         foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
19842             if ( $is_key_type{ $types_to_go[$j] } ) {
19843
19844                 # fix for b1211
19845                 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
19846                 last;
19847             }
19848             $i_opening_minus = $j;
19849         }
19850         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
19851     }
19852
19853   RETURN:
19854
19855     DEBUG_FIND_START && print <<EOM;
19856 FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
19857 EOM
19858
19859     return $i_opening_minus;
19860 } ## end sub find_token_starting_list
19861
19862 {    ## begin closure set_comma_breakpoints_do
19863
19864     my %is_keyword_with_special_leading_term;
19865
19866     BEGIN {
19867
19868         # These keywords have prototypes which allow a special leading item
19869         # followed by a list
19870         my @q =
19871           qw(formline grep kill map printf sprintf push chmod join pack unshift);
19872         @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
19873     }
19874
19875     use constant DEBUG_SPARSE => 0;
19876
19877     sub set_comma_breakpoints_do {
19878
19879         # Given a list with some commas, set breakpoints at some of the
19880         # commas, if necessary, to make it easy to read.
19881
19882         my ( $self, $rinput_hash ) = @_;
19883
19884         my $depth               = $rinput_hash->{depth};
19885         my $i_opening_paren     = $rinput_hash->{i_opening_paren};
19886         my $i_closing_paren     = $rinput_hash->{i_closing_paren};
19887         my $item_count          = $rinput_hash->{item_count};
19888         my $identifier_count    = $rinput_hash->{identifier_count};
19889         my $rcomma_index        = $rinput_hash->{rcomma_index};
19890         my $next_nonblank_type  = $rinput_hash->{next_nonblank_type};
19891         my $list_type           = $rinput_hash->{list_type};
19892         my $interrupted         = $rinput_hash->{interrupted};
19893         my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
19894         my $must_break_open     = $rinput_hash->{must_break_open};
19895         my $has_broken_sublist  = $rinput_hash->{has_broken_sublist};
19896
19897         # nothing to do if no commas seen
19898         return if ( $item_count < 1 );
19899
19900         my $i_first_comma     = $rcomma_index->[0];
19901         my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
19902         my $i_last_comma      = $i_true_last_comma;
19903         if ( $i_last_comma >= $max_index_to_go ) {
19904             $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
19905             return if ( $item_count < 1 );
19906         }
19907         my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
19908
19909         #---------------------------------------------------------------
19910         # find lengths of all items in the list to calculate page layout
19911         #---------------------------------------------------------------
19912         my $comma_count = $item_count;
19913         my @item_lengths;
19914         my @i_term_begin;
19915         my @i_term_end;
19916         my @i_term_comma;
19917         my $i_prev_plus;
19918         my @max_length = ( 0, 0 );
19919         my $first_term_length;
19920         my $i      = $i_opening_paren;
19921         my $is_odd = 1;
19922
19923         foreach my $j ( 0 .. $comma_count - 1 ) {
19924             $is_odd      = 1 - $is_odd;
19925             $i_prev_plus = $i + 1;
19926             $i           = $rcomma_index->[$j];
19927
19928             my $i_term_end =
19929               ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
19930             my $i_term_begin =
19931               ( $types_to_go[$i_prev_plus] eq 'b' )
19932               ? $i_prev_plus + 1
19933               : $i_prev_plus;
19934             push @i_term_begin, $i_term_begin;
19935             push @i_term_end,   $i_term_end;
19936             push @i_term_comma, $i;
19937
19938             # note: currently adding 2 to all lengths (for comma and space)
19939             my $length =
19940               2 + token_sequence_length( $i_term_begin, $i_term_end );
19941             push @item_lengths, $length;
19942
19943             if ( $j == 0 ) {
19944                 $first_term_length = $length;
19945             }
19946             else {
19947
19948                 if ( $length > $max_length[$is_odd] ) {
19949                     $max_length[$is_odd] = $length;
19950                 }
19951             }
19952         }
19953
19954         # now we have to make a distinction between the comma count and item
19955         # count, because the item count will be one greater than the comma
19956         # count if the last item is not terminated with a comma
19957         my $i_b =
19958           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
19959           ? $i_last_comma + 1
19960           : $i_last_comma;
19961         my $i_e =
19962           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
19963           ? $i_closing_paren - 2
19964           : $i_closing_paren - 1;
19965         my $i_effective_last_comma = $i_last_comma;
19966
19967         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
19968
19969         if ( $last_item_length > 0 ) {
19970
19971             # add 2 to length because other lengths include a comma and a blank
19972             $last_item_length += 2;
19973             push @item_lengths, $last_item_length;
19974             push @i_term_begin, $i_b + 1;
19975             push @i_term_end,   $i_e;
19976             push @i_term_comma, undef;
19977
19978             my $i_odd = $item_count % 2;
19979
19980             if ( $last_item_length > $max_length[$i_odd] ) {
19981                 $max_length[$i_odd] = $last_item_length;
19982             }
19983
19984             $item_count++;
19985             $i_effective_last_comma = $i_e + 1;
19986
19987             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
19988                 $identifier_count++;
19989             }
19990         }
19991
19992         #---------------------------------------------------------------
19993         # End of length calculations
19994         #---------------------------------------------------------------
19995
19996         #---------------------------------------------------------------
19997         # Compound List Rule 1:
19998         # Break at (almost) every comma for a list containing a broken
19999         # sublist.  This has higher priority than the Interrupted List
20000         # Rule.
20001         #---------------------------------------------------------------
20002         if ($has_broken_sublist) {
20003
20004             # Break at every comma except for a comma between two
20005             # simple, small terms.  This prevents long vertical
20006             # columns of, say, just 0's.
20007             my $small_length = 10;    # 2 + actual maximum length wanted
20008
20009             # We'll insert a break in long runs of small terms to
20010             # allow alignment in uniform tables.
20011             my $skipped_count = 0;
20012             my $columns       = table_columns_available($i_first_comma);
20013             my $fields        = int( $columns / $small_length );
20014             if (   $rOpts_maximum_fields_per_table
20015                 && $fields > $rOpts_maximum_fields_per_table )
20016             {
20017                 $fields = $rOpts_maximum_fields_per_table;
20018             }
20019             my $max_skipped_count = $fields - 1;
20020
20021             my $is_simple_last_term = 0;
20022             my $is_simple_next_term = 0;
20023             foreach my $j ( 0 .. $item_count ) {
20024                 $is_simple_last_term = $is_simple_next_term;
20025                 $is_simple_next_term = 0;
20026                 if (   $j < $item_count
20027                     && $i_term_end[$j] == $i_term_begin[$j]
20028                     && $item_lengths[$j] <= $small_length )
20029                 {
20030                     $is_simple_next_term = 1;
20031                 }
20032                 next if $j == 0;
20033                 if (   $is_simple_last_term
20034                     && $is_simple_next_term
20035                     && $skipped_count < $max_skipped_count )
20036                 {
20037                     $skipped_count++;
20038                 }
20039                 else {
20040                     $skipped_count = 0;
20041                     my $i_tc = $i_term_comma[ $j - 1 ];
20042                     last unless defined $i_tc;
20043                     $self->set_forced_breakpoint($i_tc);
20044                 }
20045             }
20046
20047             # always break at the last comma if this list is
20048             # interrupted; we wouldn't want to leave a terminal '{', for
20049             # example.
20050             if ($interrupted) {
20051                 $self->set_forced_breakpoint($i_true_last_comma);
20052             }
20053             return;
20054         }
20055
20056 #my ( $a, $b, $c ) = caller();
20057 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
20058 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
20059 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
20060
20061         #---------------------------------------------------------------
20062         # Interrupted List Rule:
20063         # A list is forced to use old breakpoints if it was interrupted
20064         # by side comments or blank lines, or requested by user.
20065         #---------------------------------------------------------------
20066         if (   $rOpts_break_at_old_comma_breakpoints
20067             || $interrupted
20068             || $i_opening_paren < 0 )
20069         {
20070             $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
20071             return;
20072         }
20073
20074         #---------------------------------------------------------------
20075         # Looks like a list of items.  We have to look at it and size it up.
20076         #---------------------------------------------------------------
20077
20078         my $opening_token       = $tokens_to_go[$i_opening_paren];
20079         my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
20080
20081         #-------------------------------------------------------------------
20082         # Return if this will fit on one line
20083         #-------------------------------------------------------------------
20084
20085         # The -bbxi=2 parameters can add an extra hidden level of indentation;
20086         # this needs a tolerance to avoid instability.  Fixes b1259, 1260.
20087         my $tol = 0;
20088         if (   $break_before_container_types{$opening_token}
20089             && $container_indentation_options{$opening_token}
20090             && $container_indentation_options{$opening_token} == 2 )
20091         {
20092             $tol = $rOpts_indent_columns;
20093
20094             # use greater of -ci and -i (fix for case b1334)
20095             if ( $tol < $rOpts_continuation_indentation ) {
20096                 $tol = $rOpts_continuation_indentation;
20097             }
20098         }
20099
20100         my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
20101         return
20102           unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
20103           + $tol > 0;
20104
20105         #-------------------------------------------------------------------
20106         # Now we know that this block spans multiple lines; we have to set
20107         # at least one breakpoint -- real or fake -- as a signal to break
20108         # open any outer containers.
20109         #-------------------------------------------------------------------
20110         set_fake_breakpoint();
20111
20112         # be sure we do not extend beyond the current list length
20113         if ( $i_effective_last_comma >= $max_index_to_go ) {
20114             $i_effective_last_comma = $max_index_to_go - 1;
20115         }
20116
20117         # Set a flag indicating if we need to break open to keep -lp
20118         # items aligned.  This is necessary if any of the list terms
20119         # exceeds the available space after the '('.
20120         my $need_lp_break_open = $must_break_open;
20121         if ( $is_lp_formatting && !$must_break_open ) {
20122             my $columns_if_unbroken =
20123               $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
20124               - total_line_length( $i_opening_minus, $i_opening_paren );
20125             $need_lp_break_open =
20126                  ( $max_length[0] > $columns_if_unbroken )
20127               || ( $max_length[1] > $columns_if_unbroken )
20128               || ( $first_term_length > $columns_if_unbroken );
20129         }
20130
20131         # Specify if the list must have an even number of fields or not.
20132         # It is generally safest to assume an even number, because the
20133         # list items might be a hash list.  But if we can be sure that
20134         # it is not a hash, then we can allow an odd number for more
20135         # flexibility.
20136         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
20137
20138         if (   $identifier_count >= $item_count - 1
20139             || $is_assignment{$next_nonblank_type}
20140             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
20141           )
20142         {
20143             $odd_or_even = 1;
20144         }
20145
20146         # do we have a long first term which should be
20147         # left on a line by itself?
20148         my $use_separate_first_term = (
20149             $odd_or_even == 1           # only if we can use 1 field/line
20150               && $item_count > 3        # need several items
20151               && $first_term_length >
20152               2 * $max_length[0] - 2    # need long first term
20153               && $first_term_length >
20154               2 * $max_length[1] - 2    # need long first term
20155         );
20156
20157         # or do we know from the type of list that the first term should
20158         # be placed alone?
20159         if ( !$use_separate_first_term ) {
20160             if ( $is_keyword_with_special_leading_term{$list_type} ) {
20161                 $use_separate_first_term = 1;
20162
20163                 # should the container be broken open?
20164                 if ( $item_count < 3 ) {
20165                     if ( $i_first_comma - $i_opening_paren < 4 ) {
20166                         ${$rdo_not_break_apart} = 1;
20167                     }
20168                 }
20169                 elsif ($first_term_length < 20
20170                     && $i_first_comma - $i_opening_paren < 4 )
20171                 {
20172                     my $columns = table_columns_available($i_first_comma);
20173                     if ( $first_term_length < $columns ) {
20174                         ${$rdo_not_break_apart} = 1;
20175                     }
20176                 }
20177             }
20178         }
20179
20180         # if so,
20181         if ($use_separate_first_term) {
20182
20183             # ..set a break and update starting values
20184             $use_separate_first_term = 1;
20185             $self->set_forced_breakpoint($i_first_comma);
20186             $i_opening_paren = $i_first_comma;
20187             $i_first_comma   = $rcomma_index->[1];
20188             $item_count--;
20189             return if $comma_count == 1;
20190             shift @item_lengths;
20191             shift @i_term_begin;
20192             shift @i_term_end;
20193             shift @i_term_comma;
20194         }
20195
20196         # if not, update the metrics to include the first term
20197         else {
20198             if ( $first_term_length > $max_length[0] ) {
20199                 $max_length[0] = $first_term_length;
20200             }
20201         }
20202
20203         # Field width parameters
20204         my $pair_width = ( $max_length[0] + $max_length[1] );
20205         my $max_width =
20206           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
20207
20208         # Number of free columns across the page width for laying out tables
20209         my $columns = table_columns_available($i_first_comma);
20210
20211         # Patch for b1210 and b1216-b1218 when -vmll is set.  If we are unable
20212         # to break after an opening paren, then the maximum line length for the
20213         # first line could be less than the later lines.  So we need to reduce
20214         # the line length.  Normally, we will get a break after an opening
20215         # paren, but in some cases we might not.
20216         if (   $rOpts_variable_maximum_line_length
20217             && $tokens_to_go[$i_opening_paren] eq '('
20218             && @i_term_begin )
20219           ##&& !$old_breakpoint_to_go[$i_opening_paren] )  ## in b1210 patch
20220         {
20221             my $ib   = $i_term_begin[0];
20222             my $type = $types_to_go[$ib];
20223
20224             # So far, the only known instance of this problem is when
20225             # a bareword follows an opening paren with -vmll
20226             if ( $type eq 'w' ) {
20227
20228                 # If a line starts with paren+space+terms, then its max length
20229                 # could be up to ci+2-i spaces less than if the term went out
20230                 # on a line after the paren.  So..
20231                 my $tol_w = max( 0,
20232                     2 + $rOpts_continuation_indentation -
20233                       $rOpts_indent_columns );
20234                 $columns = max( 0, $columns - $tol_w );
20235
20236                 ## Here is the original b1210 fix, but it failed on b1216-b1218
20237                 ##my $columns2 = table_columns_available($i_opening_paren);
20238                 ##$columns = min( $columns, $columns2 );
20239             }
20240         }
20241
20242         # Estimated maximum number of fields which fit this space
20243         # This will be our first guess
20244         my $number_of_fields_max =
20245           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
20246             $pair_width );
20247         my $number_of_fields = $number_of_fields_max;
20248
20249         # Find the best-looking number of fields
20250         # and make this our second guess if possible
20251         my ( $number_of_fields_best, $ri_ragged_break_list,
20252             $new_identifier_count )
20253           = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
20254             \@item_lengths, $max_width );
20255
20256         if (   $number_of_fields_best != 0
20257             && $number_of_fields_best < $number_of_fields_max )
20258         {
20259             $number_of_fields = $number_of_fields_best;
20260         }
20261
20262         # ----------------------------------------------------------------------
20263         # If we are crowded and the -lp option is being used, try to
20264         # undo some indentation
20265         # ----------------------------------------------------------------------
20266         if (
20267             $is_lp_formatting
20268             && (
20269                 $number_of_fields == 0
20270                 || (   $number_of_fields == 1
20271                     && $number_of_fields != $number_of_fields_best )
20272             )
20273           )
20274         {
20275             my $available_spaces =
20276               $self->get_available_spaces_to_go($i_first_comma);
20277             if ( $available_spaces > 0 ) {
20278
20279                 my $spaces_wanted = $max_width - $columns;    # for 1 field
20280
20281                 if ( $number_of_fields_best == 0 ) {
20282                     $number_of_fields_best =
20283                       get_maximum_fields_wanted( \@item_lengths );
20284                 }
20285
20286                 if ( $number_of_fields_best != 1 ) {
20287                     my $spaces_wanted_2 =
20288                       1 + $pair_width - $columns;    # for 2 fields
20289                     if ( $available_spaces > $spaces_wanted_2 ) {
20290                         $spaces_wanted = $spaces_wanted_2;
20291                     }
20292                 }
20293
20294                 if ( $spaces_wanted > 0 ) {
20295                     my $deleted_spaces =
20296                       $self->reduce_lp_indentation( $i_first_comma,
20297                         $spaces_wanted );
20298
20299                     # redo the math
20300                     if ( $deleted_spaces > 0 ) {
20301                         $columns = table_columns_available($i_first_comma);
20302                         $number_of_fields_max =
20303                           maximum_number_of_fields( $columns, $odd_or_even,
20304                             $max_width, $pair_width );
20305                         $number_of_fields = $number_of_fields_max;
20306
20307                         if (   $number_of_fields_best == 1
20308                             && $number_of_fields >= 1 )
20309                         {
20310                             $number_of_fields = $number_of_fields_best;
20311                         }
20312                     }
20313                 }
20314             }
20315         }
20316
20317         # try for one column if two won't work
20318         if ( $number_of_fields <= 0 ) {
20319             $number_of_fields = int( $columns / $max_width );
20320         }
20321
20322         # The user can place an upper bound on the number of fields,
20323         # which can be useful for doing maintenance on tables
20324         if (   $rOpts_maximum_fields_per_table
20325             && $number_of_fields > $rOpts_maximum_fields_per_table )
20326         {
20327             $number_of_fields = $rOpts_maximum_fields_per_table;
20328         }
20329
20330         # How many columns (characters) and lines would this container take
20331         # if no additional whitespace were added?
20332         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
20333             $i_effective_last_comma + 1 );
20334         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
20335         my $packed_lines = 1 + int( $packed_columns / $columns );
20336
20337         # are we an item contained in an outer list?
20338         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
20339
20340         if ( $number_of_fields <= 0 ) {
20341
20342 #         #---------------------------------------------------------------
20343 #         # We're in trouble.  We can't find a single field width that works.
20344 #         # There is no simple answer here; we may have a single long list
20345 #         # item, or many.
20346 #         #---------------------------------------------------------------
20347 #
20348 #         In many cases, it may be best to not force a break if there is just one
20349 #         comma, because the standard continuation break logic will do a better
20350 #         job without it.
20351 #
20352 #         In the common case that all but one of the terms can fit
20353 #         on a single line, it may look better not to break open the
20354 #         containing parens.  Consider, for example
20355 #
20356 #             $color =
20357 #               join ( '/',
20358 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
20359 #                 keys %colors );
20360 #
20361 #         which will look like this with the container broken:
20362 #
20363 #             $color = join (
20364 #                 '/',
20365 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
20366 #             );
20367 #
20368 #         Here is an example of this rule for a long last term:
20369 #
20370 #             log_message( 0, 256, 128,
20371 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
20372 #
20373 #         And here is an example with a long first term:
20374 #
20375 #         $s = sprintf(
20376 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
20377 #             $r, $pu, $ps, $cu, $cs, $tt
20378 #           )
20379 #           if $style eq 'all';
20380
20381             $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
20382
20383             my $long_last_term =
20384               $self->excess_line_length( 0, $i_last_comma ) <= 0;
20385             my $long_first_term =
20386               $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
20387               <= 0;
20388
20389             # break at every comma ...
20390             if (
20391
20392                 # if requested by user or is best looking
20393                 $number_of_fields_best == 1
20394
20395                 # or if this is a sublist of a larger list
20396                 || $in_hierarchical_list
20397
20398                 # or if multiple commas and we don't have a long first or last
20399                 # term
20400                 || ( $comma_count > 1
20401                     && !( $long_last_term || $long_first_term ) )
20402               )
20403             {
20404                 foreach ( 0 .. $comma_count - 1 ) {
20405                     $self->set_forced_breakpoint( $rcomma_index->[$_] );
20406                 }
20407             }
20408             elsif ($long_last_term) {
20409
20410                 $self->set_forced_breakpoint($i_last_comma);
20411                 ${$rdo_not_break_apart} = 1 unless $must_break_open;
20412             }
20413             elsif ($long_first_term) {
20414
20415                 $self->set_forced_breakpoint($i_first_comma);
20416             }
20417             else {
20418
20419                 # let breaks be defined by default bond strength logic
20420             }
20421             return;
20422         }
20423
20424         # --------------------------------------------------------
20425         # We have a tentative field count that seems to work.
20426         # How many lines will this require?
20427         # --------------------------------------------------------
20428         my $formatted_lines = $item_count / ($number_of_fields);
20429         if ( $formatted_lines != int $formatted_lines ) {
20430             $formatted_lines = 1 + int $formatted_lines;
20431         }
20432
20433         # So far we've been trying to fill out to the right margin.  But
20434         # compact tables are easier to read, so let's see if we can use fewer
20435         # fields without increasing the number of lines.
20436         $number_of_fields =
20437           compactify_table( $item_count, $number_of_fields, $formatted_lines,
20438             $odd_or_even );
20439
20440         # How many spaces across the page will we fill?
20441         my $columns_per_line =
20442           ( int $number_of_fields / 2 ) * $pair_width +
20443           ( $number_of_fields % 2 ) * $max_width;
20444
20445         my $formatted_columns;
20446
20447         if ( $number_of_fields > 1 ) {
20448             $formatted_columns =
20449               ( $pair_width * ( int( $item_count / 2 ) ) +
20450                   ( $item_count % 2 ) * $max_width );
20451         }
20452         else {
20453             $formatted_columns = $max_width * $item_count;
20454         }
20455         if ( $formatted_columns < $packed_columns ) {
20456             $formatted_columns = $packed_columns;
20457         }
20458
20459         my $unused_columns = $formatted_columns - $packed_columns;
20460
20461         # set some empirical parameters to help decide if we should try to
20462         # align; high sparsity does not look good, especially with few lines
20463         my $sparsity = ($unused_columns) / ($formatted_columns);
20464         my $max_allowed_sparsity =
20465             ( $item_count < 3 )    ? 0.1
20466           : ( $packed_lines == 1 ) ? 0.15
20467           : ( $packed_lines == 2 ) ? 0.4
20468           :                          0.7;
20469
20470         my $two_line_word_wrap_ok;
20471         if ( $opening_token eq '(' ) {
20472
20473             # default is to allow wrapping of short paren lists
20474             $two_line_word_wrap_ok = 1;
20475
20476             # but turn off word wrap where requested
20477             if ($rOpts_break_open_compact_parens) {
20478
20479                 # This parameter is a one-character flag, as follows:
20480                 #  '0' matches no parens  -> break open NOT OK -> word wrap OK
20481                 #  '1' matches all parens -> break open OK -> word wrap NOT OK
20482                 #  Other values are the same as used by the weld-exclusion-list
20483                 my $flag = $rOpts_break_open_compact_parens;
20484                 if (   $flag eq '*'
20485                     || $flag eq '1' )
20486                 {
20487                     $two_line_word_wrap_ok = 0;
20488                 }
20489                 elsif ( $flag eq '0' ) {
20490                     $two_line_word_wrap_ok = 1;
20491                 }
20492                 else {
20493                     my $KK = $K_to_go[$i_opening_paren];
20494                     $two_line_word_wrap_ok =
20495                       !$self->match_paren_flag( $KK, $flag );
20496                 }
20497             }
20498         }
20499
20500         # Begin check for shortcut methods, which avoid treating a list
20501         # as a table for relatively small parenthesized lists.  These
20502         # are usually easier to read if not formatted as tables.
20503         if (
20504             $packed_lines <= 2           # probably can fit in 2 lines
20505             && $item_count < 9           # doesn't have too many items
20506             && $opening_is_in_block      # not a sub-container
20507             && $two_line_word_wrap_ok    # ok to wrap this paren list
20508             ##&& $opening_token eq '('    # is paren list
20509           )
20510         {
20511
20512             # Shortcut method 1: for -lp and just one comma:
20513             # This is a no-brainer, just break at the comma.
20514             if (
20515                 $is_lp_formatting      # -lp
20516                 && $item_count == 2    # two items, one comma
20517                 && !$must_break_open
20518               )
20519             {
20520                 my $i_break = $rcomma_index->[0];
20521                 $self->set_forced_breakpoint($i_break);
20522                 ${$rdo_not_break_apart} = 1;
20523                 return;
20524
20525             }
20526
20527             # method 2 is for most small ragged lists which might look
20528             # best if not displayed as a table.
20529             if (
20530                 ( $number_of_fields == 2 && $item_count == 3 )
20531                 || (
20532                     $new_identifier_count > 0    # isn't all quotes
20533                     && $sparsity > 0.15
20534                 )    # would be fairly spaced gaps if aligned
20535               )
20536             {
20537
20538                 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20539                     $ri_ragged_break_list );
20540                 ++$break_count if ($use_separate_first_term);
20541
20542                 # NOTE: we should really use the true break count here,
20543                 # which can be greater if there are large terms and
20544                 # little space, but usually this will work well enough.
20545                 unless ($must_break_open) {
20546
20547                     if ( $break_count <= 1 ) {
20548                         ${$rdo_not_break_apart} = 1;
20549                     }
20550                     elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20551                         ${$rdo_not_break_apart} = 1;
20552                     }
20553                 }
20554                 return;
20555             }
20556
20557         } ## end shortcut methods
20558
20559         # debug stuff
20560         DEBUG_SPARSE && do {
20561             print STDOUT
20562 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
20563
20564         };
20565
20566         #---------------------------------------------------------------
20567         # Compound List Rule 2:
20568         # If this list is too long for one line, and it is an item of a
20569         # larger list, then we must format it, regardless of sparsity
20570         # (ian.t).  One reason that we have to do this is to trigger
20571         # Compound List Rule 1, above, which causes breaks at all commas of
20572         # all outer lists.  In this way, the structure will be properly
20573         # displayed.
20574         #---------------------------------------------------------------
20575
20576         # Decide if this list is too long for one line unless broken
20577         my $total_columns = table_columns_available($i_opening_paren);
20578         my $too_long      = $packed_columns > $total_columns;
20579
20580         # For a paren list, include the length of the token just before the
20581         # '(' because this is likely a sub call, and we would have to
20582         # include the sub name on the same line as the list.  This is still
20583         # imprecise, but not too bad.  (steve.t)
20584         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
20585
20586             $too_long = $self->excess_line_length( $i_opening_minus,
20587                 $i_effective_last_comma + 1 ) > 0;
20588         }
20589
20590         # FIXME: For an item after a '=>', try to include the length of the
20591         # thing before the '=>'.  This is crude and should be improved by
20592         # actually looking back token by token.
20593         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
20594             my $i_opening_minus_test = $i_opening_paren - 4;
20595             if ( $i_opening_minus >= 0 ) {
20596                 $too_long = $self->excess_line_length( $i_opening_minus_test,
20597                     $i_effective_last_comma + 1 ) > 0;
20598             }
20599         }
20600
20601         # Always break lists contained in '[' and '{' if too long for 1 line,
20602         # and always break lists which are too long and part of a more complex
20603         # structure.
20604         my $must_break_open_container = $must_break_open
20605           || ( $too_long
20606             && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
20607
20608 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
20609
20610         #---------------------------------------------------------------
20611         # The main decision:
20612         # Now decide if we will align the data into aligned columns.  Do not
20613         # attempt to align columns if this is a tiny table or it would be
20614         # too spaced.  It seems that the more packed lines we have, the
20615         # sparser the list that can be allowed and still look ok.
20616         #---------------------------------------------------------------
20617
20618         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
20619             || ( $formatted_lines < 2 )
20620             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
20621           )
20622         {
20623
20624             #---------------------------------------------------------------
20625             # too sparse: would look ugly if aligned in a table;
20626             #---------------------------------------------------------------
20627
20628             # use old breakpoints if this is a 'big' list
20629             if ( $packed_lines > 2 && $item_count > 10 ) {
20630                 write_logfile_entry("List sparse: using old breakpoints\n");
20631                 $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
20632             }
20633
20634             # let the continuation logic handle it if 2 lines
20635             else {
20636
20637                 my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
20638                     $ri_ragged_break_list );
20639                 ++$break_count if ($use_separate_first_term);
20640
20641                 unless ($must_break_open_container) {
20642                     if ( $break_count <= 1 ) {
20643                         ${$rdo_not_break_apart} = 1;
20644                     }
20645                     elsif ( $is_lp_formatting && !$need_lp_break_open ) {
20646                         ${$rdo_not_break_apart} = 1;
20647                     }
20648                 }
20649             }
20650             return;
20651         }
20652
20653         #---------------------------------------------------------------
20654         # go ahead and format as a table
20655         #---------------------------------------------------------------
20656         write_logfile_entry(
20657             "List: auto formatting with $number_of_fields fields/row\n");
20658
20659         my $j_first_break =
20660           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
20661
20662         my $j = $j_first_break;
20663         while ( $j < $comma_count ) {
20664             my $i_comma = $rcomma_index->[$j];
20665             $self->set_forced_breakpoint($i_comma);
20666             $j += $number_of_fields;
20667         }
20668         return;
20669     } ## end sub set_comma_breakpoints_do
20670 } ## end closure set_comma_breakpoints_do
20671
20672 sub study_list_complexity {
20673
20674     # Look for complex tables which should be formatted with one term per line.
20675     # Returns the following:
20676     #
20677     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
20678     #    which are hard to read
20679     #  $number_of_fields_best = suggested number of fields based on
20680     #    complexity; = 0 if any number may be used.
20681     #
20682     my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
20683     my $item_count            = @{$ri_term_begin};
20684     my $complex_item_count    = 0;
20685     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
20686     my $i_max                 = @{$ritem_lengths} - 1;
20687     ##my @item_complexity;
20688
20689     my $i_last_last_break = -3;
20690     my $i_last_break      = -2;
20691     my @i_ragged_break_list;
20692
20693     my $definitely_complex = 30;
20694     my $definitely_simple  = 12;
20695     my $quote_count        = 0;
20696
20697     for my $i ( 0 .. $i_max ) {
20698         my $ib = $ri_term_begin->[$i];
20699         my $ie = $ri_term_end->[$i];
20700
20701         # define complexity: start with the actual term length
20702         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
20703
20704         ##TBD: join types here and check for variations
20705         ##my $str=join "", @tokens_to_go[$ib..$ie];
20706
20707         my $is_quote = 0;
20708         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
20709             $is_quote = 1;
20710             $quote_count++;
20711         }
20712         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
20713             $quote_count++;
20714         }
20715
20716         if ( $ib eq $ie ) {
20717             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
20718                 $complex_item_count++;
20719                 $weighted_length *= 2;
20720             }
20721             else {
20722             }
20723         }
20724         else {
20725             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
20726                 $complex_item_count++;
20727                 $weighted_length *= 2;
20728             }
20729             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
20730                 $weighted_length += 4;
20731             }
20732         }
20733
20734         # add weight for extra tokens.
20735         $weighted_length += 2 * ( $ie - $ib );
20736
20737 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
20738 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
20739
20740 ##push @item_complexity, $weighted_length;
20741
20742         # now mark a ragged break after this item it if it is 'long and
20743         # complex':
20744         if ( $weighted_length >= $definitely_complex ) {
20745
20746             # if we broke after the previous term
20747             # then break before it too
20748             if (   $i_last_break == $i - 1
20749                 && $i > 1
20750                 && $i_last_last_break != $i - 2 )
20751             {
20752
20753                 ## FIXME: don't strand a small term
20754                 pop @i_ragged_break_list;
20755                 push @i_ragged_break_list, $i - 2;
20756                 push @i_ragged_break_list, $i - 1;
20757             }
20758
20759             push @i_ragged_break_list, $i;
20760             $i_last_last_break = $i_last_break;
20761             $i_last_break      = $i;
20762         }
20763
20764         # don't break before a small last term -- it will
20765         # not look good on a line by itself.
20766         elsif ($i == $i_max
20767             && $i_last_break == $i - 1
20768             && $weighted_length <= $definitely_simple )
20769         {
20770             pop @i_ragged_break_list;
20771         }
20772     }
20773
20774     my $identifier_count = $i_max + 1 - $quote_count;
20775
20776     # Need more tuning here..
20777     if (   $max_width > 12
20778         && $complex_item_count > $item_count / 2
20779         && $number_of_fields_best != 2 )
20780     {
20781         $number_of_fields_best = 1;
20782     }
20783
20784     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
20785 } ## end sub study_list_complexity
20786
20787 sub get_maximum_fields_wanted {
20788
20789     # Not all tables look good with more than one field of items.
20790     # This routine looks at a table and decides if it should be
20791     # formatted with just one field or not.
20792     # This coding is still under development.
20793     my ($ritem_lengths) = @_;
20794
20795     my $number_of_fields_best = 0;
20796
20797     # For just a few items, we tentatively assume just 1 field.
20798     my $item_count = @{$ritem_lengths};
20799     if ( $item_count <= 5 ) {
20800         $number_of_fields_best = 1;
20801     }
20802
20803     # For larger tables, look at it both ways and see what looks best
20804     else {
20805
20806         my $is_odd            = 1;
20807         my @max_length        = ( 0,     0 );
20808         my @last_length_2     = ( undef, undef );
20809         my @first_length_2    = ( undef, undef );
20810         my $last_length       = undef;
20811         my $total_variation_1 = 0;
20812         my $total_variation_2 = 0;
20813         my @total_variation_2 = ( 0, 0 );
20814
20815         foreach my $j ( 0 .. $item_count - 1 ) {
20816
20817             $is_odd = 1 - $is_odd;
20818             my $length = $ritem_lengths->[$j];
20819             if ( $length > $max_length[$is_odd] ) {
20820                 $max_length[$is_odd] = $length;
20821             }
20822
20823             if ( defined($last_length) ) {
20824                 my $dl = abs( $length - $last_length );
20825                 $total_variation_1 += $dl;
20826             }
20827             $last_length = $length;
20828
20829             my $ll = $last_length_2[$is_odd];
20830             if ( defined($ll) ) {
20831                 my $dl = abs( $length - $ll );
20832                 $total_variation_2[$is_odd] += $dl;
20833             }
20834             else {
20835                 $first_length_2[$is_odd] = $length;
20836             }
20837             $last_length_2[$is_odd] = $length;
20838         }
20839         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
20840
20841         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
20842         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
20843             $number_of_fields_best = 1;
20844         }
20845     }
20846     return ($number_of_fields_best);
20847 } ## end sub get_maximum_fields_wanted
20848
20849 sub table_columns_available {
20850     my $i_first_comma = shift;
20851     my $columns =
20852       $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
20853       leading_spaces_to_go($i_first_comma);
20854
20855     # Patch: the vertical formatter does not line up lines whose lengths
20856     # exactly equal the available line length because of allowances
20857     # that must be made for side comments.  Therefore, the number of
20858     # available columns is reduced by 1 character.
20859     $columns -= 1;
20860     return $columns;
20861 } ## end sub table_columns_available
20862
20863 sub maximum_number_of_fields {
20864
20865     # how many fields will fit in the available space?
20866     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
20867     my $max_pairs        = int( $columns / $pair_width );
20868     my $number_of_fields = $max_pairs * 2;
20869     if (   $odd_or_even == 1
20870         && $max_pairs * $pair_width + $max_width <= $columns )
20871     {
20872         $number_of_fields++;
20873     }
20874     return $number_of_fields;
20875 } ## end sub maximum_number_of_fields
20876
20877 sub compactify_table {
20878
20879     # given a table with a certain number of fields and a certain number
20880     # of lines, see if reducing the number of fields will make it look
20881     # better.
20882     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
20883     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
20884
20885         my $min_fields = $number_of_fields;
20886
20887         while ($min_fields >= $odd_or_even
20888             && $min_fields * $formatted_lines >= $item_count )
20889         {
20890             $number_of_fields = $min_fields;
20891             $min_fields -= $odd_or_even;
20892         }
20893     }
20894     return $number_of_fields;
20895 } ## end sub compactify_table
20896
20897 sub set_ragged_breakpoints {
20898
20899     # Set breakpoints in a list that cannot be formatted nicely as a
20900     # table.
20901     my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
20902
20903     my $break_count = 0;
20904     foreach ( @{$ri_ragged_break_list} ) {
20905         my $j = $ri_term_comma->[$_];
20906         if ($j) {
20907             $self->set_forced_breakpoint($j);
20908             $break_count++;
20909         }
20910     }
20911     return $break_count;
20912 } ## end sub set_ragged_breakpoints
20913
20914 sub copy_old_breakpoints {
20915     my ( $self, $i_first_comma, $i_last_comma ) = @_;
20916     for my $i ( $i_first_comma .. $i_last_comma ) {
20917         if ( $old_breakpoint_to_go[$i] ) {
20918             $self->set_forced_breakpoint($i);
20919         }
20920     }
20921     return;
20922 }
20923
20924 sub set_nobreaks {
20925     my ( $self, $i, $j ) = @_;
20926     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
20927
20928         0 && do {
20929             my ( $a, $b, $c ) = caller();
20930             print STDOUT
20931 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
20932         };
20933
20934         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
20935     }
20936
20937     # shouldn't happen; non-critical error
20938     else {
20939         0 && do {
20940             my ( $a, $b, $c ) = caller();
20941             print STDOUT
20942               "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
20943         };
20944     }
20945     return;
20946 } ## end sub set_nobreaks
20947
20948 ###############################################
20949 # CODE SECTION 12: Code for setting indentation
20950 ###############################################
20951
20952 sub token_sequence_length {
20953
20954     # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
20955     my ( $ibeg, $iend ) = @_;
20956
20957     # fix possible negative starting index
20958     if ( $ibeg < 0 ) { $ibeg = 0 }
20959
20960     # returns 0 if index range is empty (some subs assume this)
20961     if ( $ibeg > $iend ) {
20962         return 0;
20963     }
20964
20965     return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
20966 } ## end sub token_sequence_length
20967
20968 sub total_line_length {
20969
20970     # return length of a line of tokens ($ibeg .. $iend)
20971     my ( $ibeg, $iend ) = @_;
20972
20973     # Start with the leading spaces on this line ...
20974     my $length = $leading_spaces_to_go[$ibeg];
20975     if ( ref($length) ) { $length = $length->get_spaces() }
20976
20977     # ... then add the net token length
20978     $length +=
20979       $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
20980
20981     return $length;
20982 } ## end sub total_line_length
20983
20984 sub excess_line_length {
20985
20986     # return number of characters by which a line of tokens ($ibeg..$iend)
20987     # exceeds the allowable line length.
20988     # NOTE: profiling shows that efficiency of this routine is essential.
20989
20990     my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
20991
20992     # Start with the leading spaces on this line ...
20993     my $excess = $leading_spaces_to_go[$ibeg];
20994     if ( ref($excess) ) { $excess = $excess->get_spaces() }
20995
20996     # ... then add the net token length, minus the maximum length
20997     $excess +=
20998       $summed_lengths_to_go[ $iend + 1 ] -
20999       $summed_lengths_to_go[$ibeg] -
21000       $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
21001
21002     # ... and include right weld lengths unless requested not to
21003     if (   $total_weld_count
21004         && $type_sequence_to_go[$iend]
21005         && !$ignore_right_weld )
21006     {
21007         my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
21008         $excess += $wr if defined($wr);
21009     }
21010
21011     return $excess;
21012 } ## end sub excess_line_length
21013
21014 sub get_spaces {
21015
21016     # return the number of leading spaces associated with an indentation
21017     # variable $indentation is either a constant number of spaces or an object
21018     # with a get_spaces method.
21019     my $indentation = shift;
21020     return ref($indentation) ? $indentation->get_spaces() : $indentation;
21021 }
21022
21023 sub get_recoverable_spaces {
21024
21025     # return the number of spaces (+ means shift right, - means shift left)
21026     # that we would like to shift a group of lines with the same indentation
21027     # to get them to line up with their opening parens
21028     my $indentation = shift;
21029     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
21030 }
21031
21032 sub get_available_spaces_to_go {
21033
21034     my ( $self, $ii ) = @_;
21035     my $item = $leading_spaces_to_go[$ii];
21036
21037     # return the number of available leading spaces associated with an
21038     # indentation variable.  $indentation is either a constant number of
21039     # spaces or an object with a get_available_spaces method.
21040     return ref($item) ? $item->get_available_spaces() : 0;
21041 } ## end sub get_available_spaces_to_go
21042
21043 {    ## begin closure set_lp_indentation
21044
21045     use constant DEBUG_LP => 0;
21046
21047     # Stack of -lp index objects which survives between batches.
21048     my $rLP;
21049     my $max_lp_stack;
21050
21051     # The predicted position of the next opening container which may start
21052     # an -lp indentation level.  This survives between batches.
21053     my $lp_position_predictor;
21054
21055     # A level at which the lp format becomes too highly stressed to continue
21056     my $lp_cutoff_level;
21057
21058     BEGIN {
21059
21060         # Index names for the -lp stack variables.
21061         # Do not combine with other BEGIN blocks (c101).
21062
21063         my $i = 0;
21064         use constant {
21065             _lp_ci_level_        => $i++,
21066             _lp_level_           => $i++,
21067             _lp_object_          => $i++,
21068             _lp_container_seqno_ => $i++,
21069             _lp_space_count_     => $i++,
21070         };
21071     }
21072
21073     sub initialize_lp_vars {
21074
21075         # initialize gnu variables for a new file;
21076         # must be called once at the start of a new file.
21077
21078         $lp_position_predictor = 0;
21079         $max_lp_stack          = 0;
21080         $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
21081
21082         # we can turn off -lp if all levels will be at or above the cutoff
21083         if ( $lp_cutoff_level <= 1 ) {
21084             $rOpts_line_up_parentheses          = 0;
21085             $rOpts_extended_line_up_parentheses = 0;
21086         }
21087
21088         $rLP = [];
21089
21090         # initialize the leading whitespace stack to negative levels
21091         # so that we can never run off the end of the stack
21092         $rLP->[$max_lp_stack]->[_lp_ci_level_]        = -1;
21093         $rLP->[$max_lp_stack]->[_lp_level_]           = -1;
21094         $rLP->[$max_lp_stack]->[_lp_object_]          = undef;
21095         $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
21096         $rLP->[$max_lp_stack]->[_lp_space_count_]     = 0;
21097
21098         return;
21099     } ## end sub initialize_lp_vars
21100
21101     # hashes for efficient testing
21102     my %hash_test1;
21103     my %hash_test2;
21104     my %hash_test3;
21105
21106     BEGIN {
21107         my @q = qw< } ) ] >;
21108         @hash_test1{@q} = (1) x scalar(@q);
21109         @q = qw(: ? f);
21110         push @q, ',';
21111         @hash_test2{@q} = (1) x scalar(@q);
21112         @q              = qw( . || && );
21113         @hash_test3{@q} = (1) x scalar(@q);
21114     }
21115
21116     sub set_lp_indentation {
21117
21118         #------------------------------------------------------------------
21119         # Define the leading whitespace for all tokens in the current batch
21120         # when the -lp formatting is selected.
21121         #------------------------------------------------------------------
21122
21123         my ($self) = @_;
21124
21125         return unless ($rOpts_line_up_parentheses);
21126         return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
21127
21128         # List of -lp indentation objects created in this batch
21129         my $rlp_object_list    = [];
21130         my $max_lp_object_list = UNDEFINED_INDEX;
21131
21132         my %last_lp_equals;
21133         my %lp_comma_count;
21134         my %lp_arrow_count;
21135         my $ii_begin_line = 0;
21136
21137         my $rLL                       = $self->[_rLL_];
21138         my $Klimit                    = $self->[_Klimit_];
21139         my $rbreak_container          = $self->[_rbreak_container_];
21140         my $rshort_nested             = $self->[_rshort_nested_];
21141         my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
21142         my $rblock_type_of_seqno      = $self->[_rblock_type_of_seqno_];
21143         my $starting_in_quote   = $self->[_this_batch_]->[_starting_in_quote_];
21144         my $K_closing_container = $self->[_K_closing_container_];
21145         my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
21146         my $radjusted_levels    = $self->[_radjusted_levels_];
21147         my $rbreak_before_container_by_seqno =
21148           $self->[_rbreak_before_container_by_seqno_];
21149         my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
21150
21151         my $nws  = @{$radjusted_levels};
21152         my $imin = 0;
21153
21154         # The 'starting_in_quote' flag means that the first token is the first
21155         # token of a line and it is also the continuation of some kind of
21156         # multi-line quote or pattern.  It must have no added leading
21157         # whitespace, so we can skip it.
21158         if ($starting_in_quote) {
21159             $imin += 1;
21160         }
21161
21162         my $K_last_nonblank;
21163         my $Kpnb = $K_to_go[0] - 1;
21164         if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
21165             $Kpnb -= 1;
21166         }
21167         if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
21168             $K_last_nonblank = $Kpnb;
21169         }
21170
21171         my $last_nonblank_token     = EMPTY_STRING;
21172         my $last_nonblank_type      = EMPTY_STRING;
21173         my $last_last_nonblank_type = EMPTY_STRING;
21174
21175         if ( defined($K_last_nonblank) ) {
21176             $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
21177             $last_nonblank_type  = $rLL->[$K_last_nonblank]->[_TYPE_];
21178         }
21179
21180         my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
21181         my $stack_changed = 1;
21182
21183         #-----------------------------------
21184         # Loop over all tokens in this batch
21185         #-----------------------------------
21186         foreach my $ii ( $imin .. $max_index_to_go ) {
21187
21188             my $KK              = $K_to_go[$ii];
21189             my $type            = $types_to_go[$ii];
21190             my $token           = $tokens_to_go[$ii];
21191             my $level           = $levels_to_go[$ii];
21192             my $ci_level        = $ci_levels_to_go[$ii];
21193             my $total_depth     = $nesting_depth_to_go[$ii];
21194             my $standard_spaces = $leading_spaces_to_go[$ii];
21195
21196             #--------------------------------------------------
21197             # Adjust levels if necessary to recycle whitespace:
21198             #--------------------------------------------------
21199             if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
21200             {
21201                 $level = $radjusted_levels->[$KK];
21202                 if ( $level < 0 ) { $level = 0 }  # note: this should not happen
21203             }
21204
21205             # get the top state from the stack if it has changed
21206             if ($stack_changed) {
21207                 my $rLP_top   = $rLP->[$max_lp_stack];
21208                 my $lp_object = $rLP_top->[_lp_object_];
21209                 if ($lp_object) {
21210                     ( $space_count, $current_level, $current_ci_level ) =
21211                       @{ $lp_object->get_spaces_level_ci() };
21212                 }
21213                 else {
21214                     $current_ci_level = $rLP_top->[_lp_ci_level_];
21215                     $current_level    = $rLP_top->[_lp_level_];
21216                     $space_count      = $rLP_top->[_lp_space_count_];
21217                 }
21218                 $stack_changed = 0;
21219             }
21220
21221             #------------------------------
21222             # update the position predictor
21223             #------------------------------
21224             if ( $type eq '{' || $type eq '(' ) {
21225
21226                 $lp_comma_count{ $total_depth + 1 } = 0;
21227                 $lp_arrow_count{ $total_depth + 1 } = 0;
21228
21229                 # If we come to an opening token after an '=' token of some
21230                 # type, see if it would be helpful to 'break' after the '=' to
21231                 # save space
21232                 my $last_equals = $last_lp_equals{$total_depth};
21233
21234                 # Skip an empty set of parens, such as after channel():
21235                 #   my $exchange = $self->_channel()->exchange(
21236                 # This fixes issues b1318 b1322 b1323 b1328
21237                 # TODO: maybe also skip parens with just one token?
21238                 my $is_empty_container;
21239                 if ( $last_equals && $ii < $max_index_to_go ) {
21240                     my $seqno    = $type_sequence_to_go[$ii];
21241                     my $inext_nb = $ii + 1;
21242                     $inext_nb++
21243                       if ( $types_to_go[$inext_nb] eq 'b' );
21244                     my $seqno_nb = $type_sequence_to_go[$inext_nb];
21245                     $is_empty_container =
21246                       $seqno && $seqno_nb && $seqno_nb == $seqno;
21247                 }
21248
21249                 if (   $last_equals
21250                     && $last_equals > $ii_begin_line
21251                     && !$is_empty_container )
21252                 {
21253
21254                     my $seqno = $type_sequence_to_go[$ii];
21255
21256                     # find the position if we break at the '='
21257                     my $i_test = $last_equals;
21258
21259                     # Fix for issue b1229, check for break before
21260                     if ( $want_break_before{ $types_to_go[$i_test] } ) {
21261                         if ( $i_test > 0 ) { $i_test-- }
21262                     }
21263                     elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
21264
21265                     my $test_position = total_line_length( $i_test, $ii );
21266                     my $mll =
21267                       $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
21268
21269                     #------------------------------------------------------
21270                     # Break if structure will reach the maximum line length
21271                     #------------------------------------------------------
21272
21273                     # Historically, -lp just used one-half line length here
21274                     my $len_increase = $rOpts_maximum_line_length / 2;
21275
21276                     # For -xlp, we can also use the pre-computed lengths
21277                     my $min_len = $rcollapsed_length_by_seqno->{$seqno};
21278                     if ( $min_len && $min_len > $len_increase ) {
21279                         $len_increase = $min_len;
21280                     }
21281
21282                     if (
21283
21284                         # if we might exceed the maximum line length
21285                         $lp_position_predictor + $len_increase > $mll
21286
21287                         # if a -bbx flag WANTS a break before this opening token
21288                         || (   $seqno
21289                             && $rbreak_before_container_by_seqno->{$seqno} )
21290
21291                         # or we are beyond the 1/4 point and there was an old
21292                         # break at an assignment (not '=>') [fix for b1035]
21293                         || (
21294                             $lp_position_predictor >
21295                             $mll - $rOpts_maximum_line_length * 3 / 4
21296                             && $types_to_go[$last_equals] ne '=>'
21297                             && (
21298                                 $old_breakpoint_to_go[$last_equals]
21299                                 || (   $last_equals > 0
21300                                     && $old_breakpoint_to_go[ $last_equals - 1 ]
21301                                 )
21302                                 || (   $last_equals > 1
21303                                     && $types_to_go[ $last_equals - 1 ] eq 'b'
21304                                     && $old_breakpoint_to_go[ $last_equals - 2 ]
21305                                 )
21306                             )
21307                         )
21308                       )
21309                     {
21310
21311                         # then make the switch -- note that we do not set a
21312                         # real breakpoint here because we may not really need
21313                         # one; sub break_lists will do that if necessary.
21314
21315                         my $Kc = $K_closing_container->{$seqno};
21316                         if (
21317
21318                             # For -lp, only if the closing token is in this
21319                             # batch (c117).  Otherwise it cannot be done by sub
21320                             # break_lists.
21321                             defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
21322
21323                             # For -xlp, we only need one nonblank token after
21324                             # the opening token.
21325                             || $rOpts_extended_line_up_parentheses
21326                           )
21327                         {
21328                             $ii_begin_line         = $i_test + 1;
21329                             $lp_position_predictor = $test_position;
21330
21331                             #--------------------------------------------------
21332                             # Fix for an opening container terminating a batch:
21333                             #--------------------------------------------------
21334                             # To get alignment of a -lp container with its
21335                             # contents, we have to put a break after $i_test.
21336                             # For $ii<$max_index_to_go, this will be done by
21337                             # sub break_lists based on the indentation object.
21338                             # But for $ii=$max_index_to_go, the indentation
21339                             # object for this seqno will not be created until
21340                             # the next batch, so we have to set a break at
21341                             # $i_test right now in order to get one.
21342                             if (   $ii == $max_index_to_go
21343                                 && !$block_type_to_go[$ii]
21344                                 && $type eq '{'
21345                                 && $seqno
21346                                 && !$ris_excluded_lp_container->{$seqno} )
21347                             {
21348                                 $self->set_forced_lp_break( $ii_begin_line,
21349                                     $ii );
21350                             }
21351                         }
21352                     }
21353                 }
21354             } ## end update position predictor
21355
21356             #------------------------
21357             # Handle decreasing depth
21358             #------------------------
21359             # Note that one token may have both decreasing and then increasing
21360             # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
21361             # in this example we would first go back to (1,0) then up to (2,0)
21362             # in a single call.
21363             if ( $level < $current_level || $ci_level < $current_ci_level ) {
21364
21365                 # loop to find the first entry at or completely below this level
21366                 while (1) {
21367                     if ($max_lp_stack) {
21368
21369                         # save index of token which closes this level
21370                         if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21371                             my $lp_object =
21372                               $rLP->[$max_lp_stack]->[_lp_object_];
21373
21374                             $lp_object->set_closed($ii);
21375
21376                             my $comma_count = 0;
21377                             my $arrow_count = 0;
21378                             if ( $type eq '}' || $type eq ')' ) {
21379                                 $comma_count = $lp_comma_count{$total_depth};
21380                                 $arrow_count = $lp_arrow_count{$total_depth};
21381                                 $comma_count = 0 unless $comma_count;
21382                                 $arrow_count = 0 unless $arrow_count;
21383                             }
21384
21385                             $lp_object->set_comma_count($comma_count);
21386                             $lp_object->set_arrow_count($arrow_count);
21387
21388                             # Undo any extra indentation if we saw no commas
21389                             my $available_spaces =
21390                               $lp_object->get_available_spaces();
21391                             my $K_start = $lp_object->get_K_begin_line();
21392
21393                             if (   $available_spaces > 0
21394                                 && $K_start >= $K_to_go[0]
21395                                 && ( $comma_count <= 0 || $arrow_count > 0 ) )
21396                             {
21397
21398                                 my $i = $lp_object->get_lp_item_index();
21399
21400                                 # Safety check for a valid stack index. It
21401                                 # should be ok because we just checked that the
21402                                 # index K of the token associated with this
21403                                 # indentation is in this batch.
21404                                 if ( $i < 0 || $i > $max_lp_object_list ) {
21405                                     if (DEVEL_MODE) {
21406                                         my $lno = $rLL->[$KK]->[_LINE_INDEX_];
21407                                         Fault(<<EOM);
21408 Program bug with -lp near line $lno.  Stack index i=$i should be >=0 and <= max=$max_lp_object_list
21409 EOM
21410                                     }
21411                                 }
21412                                 else {
21413                                     if ( $arrow_count == 0 ) {
21414                                         $rlp_object_list->[$i]
21415                                           ->permanently_decrease_available_spaces
21416                                           ($available_spaces);
21417                                     }
21418                                     else {
21419                                         $rlp_object_list->[$i]
21420                                           ->tentatively_decrease_available_spaces
21421                                           ($available_spaces);
21422                                     }
21423                                     foreach
21424                                       my $j ( $i + 1 .. $max_lp_object_list )
21425                                     {
21426                                         $rlp_object_list->[$j]
21427                                           ->decrease_SPACES($available_spaces);
21428                                     }
21429                                 }
21430                             }
21431                         }
21432
21433                         # go down one level
21434                         --$max_lp_stack;
21435
21436                         my $rLP_top = $rLP->[$max_lp_stack];
21437                         my $ci_lev  = $rLP_top->[_lp_ci_level_];
21438                         my $lev     = $rLP_top->[_lp_level_];
21439                         my $spaces  = $rLP_top->[_lp_space_count_];
21440                         if ( $rLP_top->[_lp_object_] ) {
21441                             my $lp_obj = $rLP_top->[_lp_object_];
21442                             ( $spaces, $lev, $ci_lev ) =
21443                               @{ $lp_obj->get_spaces_level_ci() };
21444                         }
21445
21446                         # stop when we reach a level at or below the current
21447                         # level
21448                         if ( $lev <= $level && $ci_lev <= $ci_level ) {
21449                             $space_count      = $spaces;
21450                             $current_level    = $lev;
21451                             $current_ci_level = $ci_lev;
21452                             last;
21453                         }
21454                     }
21455
21456                     # reached bottom of stack .. should never happen because
21457                     # only negative levels can get here, and $level was forced
21458                     # to be positive above.
21459                     else {
21460
21461                         # non-fatal, keep going except in DEVEL_MODE
21462                         if (DEVEL_MODE) {
21463 ##program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
21464                             Fault(<<EOM);
21465 program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
21466 EOM
21467                         }
21468                         last;
21469                     }
21470                 }
21471             } ## end decreasing depth
21472
21473             #------------------------
21474             # handle increasing depth
21475             #------------------------
21476             if ( $level > $current_level || $ci_level > $current_ci_level ) {
21477
21478                 $stack_changed = 1;
21479
21480                 # Compute the standard incremental whitespace.  This will be
21481                 # the minimum incremental whitespace that will be used.  This
21482                 # choice results in a smooth transition between the gnu-style
21483                 # and the standard style.
21484                 my $standard_increment =
21485                   ( $level - $current_level ) *
21486                   $rOpts_indent_columns +
21487                   ( $ci_level - $current_ci_level ) *
21488                   $rOpts_continuation_indentation;
21489
21490                 # Now we have to define how much extra incremental space
21491                 # ("$available_space") we want.  This extra space will be
21492                 # reduced as necessary when long lines are encountered or when
21493                 # it becomes clear that we do not have a good list.
21494                 my $available_spaces = 0;
21495                 my $align_seqno      = 0;
21496
21497                 my $last_nonblank_seqno;
21498                 my $last_nonblank_block_type;
21499                 if ( defined($K_last_nonblank) ) {
21500                     $last_nonblank_seqno =
21501                       $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
21502                     $last_nonblank_block_type =
21503                         $last_nonblank_seqno
21504                       ? $rblock_type_of_seqno->{$last_nonblank_seqno}
21505                       : undef;
21506                 }
21507
21508                 $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
21509
21510                 #-----------------------------------------------
21511                 # Initialize indentation spaces on empty stack..
21512                 #-----------------------------------------------
21513                 if ( $max_lp_stack == 0 ) {
21514                     $space_count = $level * $rOpts_indent_columns;
21515                 }
21516
21517                 #----------------------------------------
21518                 # Add the standard space increment if ...
21519                 #----------------------------------------
21520                 elsif (
21521
21522                     # if this is a BLOCK, add the standard increment
21523                     $last_nonblank_block_type
21524
21525                     # or if this is not a sequenced item
21526                     || !$last_nonblank_seqno
21527
21528                     # or this container is excluded by user rules
21529                     # or contains here-docs or multiline qw text
21530                     || defined($last_nonblank_seqno)
21531                     && $ris_excluded_lp_container->{$last_nonblank_seqno}
21532
21533                     # or if last nonblank token was not structural indentation
21534                     || $last_nonblank_type ne '{'
21535
21536                     # and do not start -lp under stress .. fixes b1244, b1255
21537                     || !$in_lp_mode && $level >= $lp_cutoff_level
21538
21539                   )
21540                 {
21541
21542                     # If we have entered lp mode, use the top lp object to get
21543                     # the current indentation spaces because it may have
21544                     # changed.  Fixes b1285, b1286.
21545                     if ($in_lp_mode) {
21546                         $space_count = $in_lp_mode->get_spaces();
21547                     }
21548                     $space_count += $standard_increment;
21549                 }
21550
21551                 #---------------------------------------------------------------
21552                 # -lp mode: try to use space to the first non-blank level change
21553                 #---------------------------------------------------------------
21554                 else {
21555
21556                     # see how much space we have available
21557                     my $test_space_count = $lp_position_predictor;
21558                     my $excess           = 0;
21559                     my $min_len =
21560                       $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
21561                     my $next_opening_too_far;
21562
21563                     if ( defined($min_len) ) {
21564                         $excess =
21565                           $test_space_count +
21566                           $min_len -
21567                           $maximum_line_length_at_level[$level];
21568                         if ( $excess > 0 ) {
21569                             $test_space_count -= $excess;
21570
21571                             # will the next opening token be a long way out?
21572                             $next_opening_too_far =
21573                               $lp_position_predictor + $excess >
21574                               $maximum_line_length_at_level[$level];
21575                         }
21576                     }
21577
21578                     my $rLP_top             = $rLP->[$max_lp_stack];
21579                     my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
21580                     if ( $rLP_top->[_lp_object_] ) {
21581                         $min_gnu_indentation =
21582                           $rLP_top->[_lp_object_]->get_spaces();
21583                     }
21584                     $available_spaces =
21585                       $test_space_count - $min_gnu_indentation;
21586
21587                     # Do not startup -lp indentation mode if no space ...
21588                     # ... or if it puts the opening far to the right
21589                     if ( !$in_lp_mode
21590                         && ( $available_spaces <= 0 || $next_opening_too_far ) )
21591                     {
21592                         $space_count += $standard_increment;
21593                         $available_spaces = 0;
21594                     }
21595
21596                     # Use -lp mode
21597                     else {
21598                         $space_count = $test_space_count;
21599
21600                         $in_lp_mode = 1;
21601                         if ( $available_spaces >= $standard_increment ) {
21602                             $min_gnu_indentation += $standard_increment;
21603                         }
21604                         elsif ( $available_spaces > 1 ) {
21605                             $min_gnu_indentation += $available_spaces + 1;
21606                         }
21607                         ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
21608                         elsif ( $is_opening_token{$last_nonblank_token} ) {
21609                             if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
21610                                 $min_gnu_indentation += 2;
21611                             }
21612                             else {
21613                                 $min_gnu_indentation += 1;
21614                             }
21615                         }
21616                         else {
21617                             $min_gnu_indentation += $standard_increment;
21618                         }
21619                         $available_spaces = $space_count - $min_gnu_indentation;
21620
21621                         if ( $available_spaces < 0 ) {
21622                             $space_count      = $min_gnu_indentation;
21623                             $available_spaces = 0;
21624                         }
21625                         $align_seqno = $last_nonblank_seqno;
21626                     }
21627                 }
21628
21629                 #-------------------------------------------
21630                 # update the state, but not on a blank token
21631                 #-------------------------------------------
21632                 if ( $type ne 'b' ) {
21633
21634                     if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21635                         $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
21636                         $in_lp_mode = 1;
21637                     }
21638
21639                     #----------------------------------------
21640                     # Create indentation object if in lp-mode
21641                     #----------------------------------------
21642                     ++$max_lp_stack;
21643                     my $lp_object;
21644                     if ($in_lp_mode) {
21645
21646                         # A negative level implies not to store the item in the
21647                         # item_list
21648                         my $lp_item_index = 0;
21649                         if ( $level >= 0 ) {
21650                             $lp_item_index = ++$max_lp_object_list;
21651                         }
21652
21653                         my $K_begin_line = 0;
21654                         if (   $ii_begin_line >= 0
21655                             && $ii_begin_line <= $max_index_to_go )
21656                         {
21657                             $K_begin_line = $K_to_go[$ii_begin_line];
21658                         }
21659
21660                         # Minor Fix: when creating indentation at a side
21661                         # comment we don't know what the space to the actual
21662                         # next code token will be.  We will allow a space for
21663                         # sub correct_lp to move it in if necessary.
21664                         if (   $type eq '#'
21665                             && $max_index_to_go > 0
21666                             && $align_seqno )
21667                         {
21668                             $available_spaces += 1;
21669                         }
21670
21671                         $lp_object = Perl::Tidy::IndentationItem->new(
21672                             spaces           => $space_count,
21673                             level            => $level,
21674                             ci_level         => $ci_level,
21675                             available_spaces => $available_spaces,
21676                             lp_item_index    => $lp_item_index,
21677                             align_seqno      => $align_seqno,
21678                             stack_depth      => $max_lp_stack,
21679                             K_begin_line     => $K_begin_line,
21680                             standard_spaces  => $standard_spaces,
21681                         );
21682
21683                         DEBUG_LP && do {
21684                             my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
21685                             print STDERR <<EOM;
21686 DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
21687 EOM
21688                         };
21689
21690                         if ( $level >= 0 ) {
21691                             $rlp_object_list->[$max_lp_object_list] =
21692                               $lp_object;
21693                         }
21694
21695                         ##if (   $last_nonblank_token =~ /^[\{\[\(]$/
21696                         if (   $is_opening_token{$last_nonblank_token}
21697                             && $last_nonblank_seqno )
21698                         {
21699                             $rlp_object_by_seqno->{$last_nonblank_seqno} =
21700                               $lp_object;
21701                         }
21702                     }
21703
21704                     #------------------------------------
21705                     # Store this indentation on the stack
21706                     #------------------------------------
21707                     $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
21708                     $rLP->[$max_lp_stack]->[_lp_level_]    = $level;
21709                     $rLP->[$max_lp_stack]->[_lp_object_]   = $lp_object;
21710                     $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
21711                       $last_nonblank_seqno;
21712                     $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
21713
21714                     # If the opening paren is beyond the half-line length, then
21715                     # we will use the minimum (standard) indentation.  This will
21716                     # help avoid problems associated with running out of space
21717                     # near the end of a line.  As a result, in deeply nested
21718                     # lists, there will be some indentations which are limited
21719                     # to this minimum standard indentation. But the most deeply
21720                     # nested container will still probably be able to shift its
21721                     # parameters to the right for proper alignment, so in most
21722                     # cases this will not be noticeable.
21723                     if ( $available_spaces > 0 && $lp_object ) {
21724                         my $halfway =
21725                           $maximum_line_length_at_level[$level] -
21726                           $rOpts_maximum_line_length / 2;
21727                         $lp_object->tentatively_decrease_available_spaces(
21728                             $available_spaces)
21729                           if ( $space_count > $halfway );
21730                     }
21731                 }
21732             } ## end increasing depth
21733
21734             #------------------
21735             # Handle all tokens
21736             #------------------
21737             if ( $type ne 'b' ) {
21738
21739                 # Count commas and look for non-list characters.  Once we see a
21740                 # non-list character, we give up and don't look for any more
21741                 # commas.
21742                 if ( $type eq '=>' ) {
21743                     $lp_arrow_count{$total_depth}++;
21744
21745                     # remember '=>' like '=' for estimating breaks (but see
21746                     # above note for b1035)
21747                     $last_lp_equals{$total_depth} = $ii;
21748                 }
21749
21750                 elsif ( $type eq ',' ) {
21751                     $lp_comma_count{$total_depth}++;
21752                 }
21753
21754                 elsif ( $is_assignment{$type} ) {
21755                     $last_lp_equals{$total_depth} = $ii;
21756                 }
21757
21758                 # this token might start a new line if ..
21759                 if (
21760
21761                     # this is the first nonblank token of the line
21762                     $ii == 1 && $types_to_go[0] eq 'b'
21763
21764                     # or previous character was one of these:
21765                     #  /^([\:\?\,f])$/
21766                     || $hash_test2{$last_nonblank_type}
21767
21768                     # or previous character was opening and this is not closing
21769                     || ( $last_nonblank_type eq '{' && $type ne '}' )
21770                     || ( $last_nonblank_type eq '(' and $type ne ')' )
21771
21772                     # or this token is one of these:
21773                     #  /^([\.]|\|\||\&\&)$/
21774                     || $hash_test3{$type}
21775
21776                     # or this is a closing structure
21777                     || (   $last_nonblank_type eq '}'
21778                         && $last_nonblank_token eq $last_nonblank_type )
21779
21780                     # or previous token was keyword 'return'
21781                     || (
21782                         $last_nonblank_type eq 'k'
21783                         && (   $last_nonblank_token eq 'return'
21784                             && $type ne '{' )
21785                     )
21786
21787                     # or starting a new line at certain keywords is fine
21788                     || (   $type eq 'k'
21789                         && $is_if_unless_and_or_last_next_redo_return{$token} )
21790
21791                     # or this is after an assignment after a closing structure
21792                     || (
21793                         $is_assignment{$last_nonblank_type}
21794                         && (
21795                             # /^[\}\)\]]$/
21796                             $hash_test1{$last_last_nonblank_type}
21797
21798                             # and it is significantly to the right
21799                             || $lp_position_predictor > (
21800                                 $maximum_line_length_at_level[$level] -
21801                                   $rOpts_maximum_line_length / 2
21802                             )
21803                         )
21804                     )
21805                   )
21806                 {
21807                     check_for_long_gnu_style_lines( $ii, $rlp_object_list );
21808                     $ii_begin_line = $ii;
21809
21810                     # back up 1 token if we want to break before that type
21811                     # otherwise, we may strand tokens like '?' or ':' on a line
21812                     if ( $ii_begin_line > 0 ) {
21813                         if ( $last_nonblank_type eq 'k' ) {
21814
21815                             if ( $want_break_before{$last_nonblank_token} ) {
21816                                 $ii_begin_line--;
21817                             }
21818                         }
21819                         elsif ( $want_break_before{$last_nonblank_type} ) {
21820                             $ii_begin_line--;
21821                         }
21822                     }
21823                 } ## end if ( $ii == 1 && $types_to_go...)
21824
21825                 $K_last_nonblank = $KK;
21826
21827                 $last_last_nonblank_type = $last_nonblank_type;
21828                 $last_nonblank_type      = $type;
21829                 $last_nonblank_token     = $token;
21830
21831             } ## end if ( $type ne 'b' )
21832
21833             # remember the predicted position of this token on the output line
21834             if ( $ii > $ii_begin_line ) {
21835
21836                 ## NOTE: this is a critical loop - the following call has been
21837                 ## expanded for about 2x speedup:
21838                 ## $lp_position_predictor =
21839                 ##    total_line_length( $ii_begin_line, $ii );
21840
21841                 my $indentation = $leading_spaces_to_go[$ii_begin_line];
21842                 if ( ref($indentation) ) {
21843                     $indentation = $indentation->get_spaces();
21844                 }
21845                 $lp_position_predictor =
21846                   $indentation +
21847                   $summed_lengths_to_go[ $ii + 1 ] -
21848                   $summed_lengths_to_go[$ii_begin_line];
21849             }
21850             else {
21851                 $lp_position_predictor =
21852                   $space_count + $token_lengths_to_go[$ii];
21853             }
21854
21855             # Store the indentation object for this token.
21856             # This allows us to manipulate the leading whitespace
21857             # (in case we have to reduce indentation to fit a line) without
21858             # having to change any token values.
21859
21860             #---------------------------------------------------------------
21861             # replace leading whitespace with indentation objects where used
21862             #---------------------------------------------------------------
21863             if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
21864                 my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
21865                 $leading_spaces_to_go[$ii] = $lp_object;
21866                 if (   $max_lp_stack > 0
21867                     && $ci_level
21868                     && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
21869                 {
21870                     $reduced_spaces_to_go[$ii] =
21871                       $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
21872                 }
21873                 else {
21874                     $reduced_spaces_to_go[$ii] = $lp_object;
21875                 }
21876             }
21877         } ## end loop over all tokens in this batch
21878
21879         undo_incomplete_lp_indentation($rlp_object_list)
21880           if ( !$rOpts_extended_line_up_parentheses );
21881
21882         return;
21883     } ## end sub set_lp_indentation
21884
21885     sub check_for_long_gnu_style_lines {
21886
21887         # look at the current estimated maximum line length, and
21888         # remove some whitespace if it exceeds the desired maximum
21889         my ( $mx_index_to_go, $rlp_object_list ) = @_;
21890
21891         my $max_lp_object_list = @{$rlp_object_list} - 1;
21892
21893         # nothing can be done if no stack items defined for this line
21894         return if ( $max_lp_object_list < 0 );
21895
21896         # see if we have exceeded the maximum desired line length
21897         # keep 2 extra free because they are needed in some cases
21898         # (result of trial-and-error testing)
21899         my $spaces_needed =
21900           $lp_position_predictor -
21901           $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
21902
21903         return if ( $spaces_needed <= 0 );
21904
21905         # We are over the limit, so try to remove a requested number of
21906         # spaces from leading whitespace.  We are only allowed to remove
21907         # from whitespace items created on this batch, since others have
21908         # already been used and cannot be undone.
21909         my @candidates = ();
21910
21911         # loop over all whitespace items created for the current batch
21912         foreach my $i ( 0 .. $max_lp_object_list ) {
21913             my $item = $rlp_object_list->[$i];
21914
21915             # item must still be open to be a candidate (otherwise it
21916             # cannot influence the current token)
21917             next if ( $item->get_closed() >= 0 );
21918
21919             my $available_spaces = $item->get_available_spaces();
21920
21921             if ( $available_spaces > 0 ) {
21922                 push( @candidates, [ $i, $available_spaces ] );
21923             }
21924         }
21925
21926         return unless (@candidates);
21927
21928         # sort by available whitespace so that we can remove whitespace
21929         # from the maximum available first.
21930         @candidates =
21931           sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
21932
21933         # keep removing whitespace until we are done or have no more
21934         foreach my $candidate (@candidates) {
21935             my ( $i, $available_spaces ) = @{$candidate};
21936             my $deleted_spaces =
21937               ( $available_spaces > $spaces_needed )
21938               ? $spaces_needed
21939               : $available_spaces;
21940
21941             # remove the incremental space from this item
21942             $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
21943
21944             my $i_debug = $i;
21945
21946             # update the leading whitespace of this item and all items
21947             # that came after it
21948             $i -= 1;
21949             while ( ++$i <= $max_lp_object_list ) {
21950
21951                 my $old_spaces = $rlp_object_list->[$i]->get_spaces();
21952                 if ( $old_spaces >= $deleted_spaces ) {
21953                     $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
21954                 }
21955
21956                 # shouldn't happen except for code bug:
21957                 else {
21958                     # non-fatal, keep going except in DEVEL_MODE
21959                     if (DEVEL_MODE) {
21960                         my $level = $rlp_object_list->[$i_debug]->get_level();
21961                         my $ci_level =
21962                           $rlp_object_list->[$i_debug]->get_ci_level();
21963                         my $old_level = $rlp_object_list->[$i]->get_level();
21964                         my $old_ci_level =
21965                           $rlp_object_list->[$i]->get_ci_level();
21966                         Fault(<<EOM);
21967 program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level
21968 EOM
21969                     }
21970                 }
21971             }
21972             $lp_position_predictor -= $deleted_spaces;
21973             $spaces_needed         -= $deleted_spaces;
21974             last unless ( $spaces_needed > 0 );
21975         }
21976         return;
21977     } ## end sub check_for_long_gnu_style_lines
21978
21979     sub undo_incomplete_lp_indentation {
21980
21981         #------------------------------------------------------------------
21982         # Undo indentation for all incomplete -lp indentation levels of the
21983         # current batch unless -xlp is set.
21984         #------------------------------------------------------------------
21985
21986         # This routine is called once after each output stream batch is
21987         # finished to undo indentation for all incomplete -lp indentation
21988         # levels.  If this routine is called then comments and blank lines will
21989         # disrupt this indentation style.  In older versions of perltidy this
21990         # was always done because it could cause problems otherwise, but recent
21991         # improvements allow fairly good results to be obtained by skipping
21992         # this step with the -xlp flag.
21993         my ($rlp_object_list) = @_;
21994
21995         my $max_lp_object_list = @{$rlp_object_list} - 1;
21996
21997         # nothing to do if no stack items defined for this line
21998         return if ( $max_lp_object_list < 0 );
21999
22000         # loop over all whitespace items created for the current batch
22001         foreach my $i ( 0 .. $max_lp_object_list ) {
22002             my $item = $rlp_object_list->[$i];
22003
22004             # only look for open items
22005             next if ( $item->get_closed() >= 0 );
22006
22007             # Tentatively remove all of the available space
22008             # (The vertical aligner will try to get it back later)
22009             my $available_spaces = $item->get_available_spaces();
22010             if ( $available_spaces > 0 ) {
22011
22012                 # delete incremental space for this item
22013                 $rlp_object_list->[$i]
22014                   ->tentatively_decrease_available_spaces($available_spaces);
22015
22016                 # Reduce the total indentation space of any nodes that follow
22017                 # Note that any such nodes must necessarily be dependents
22018                 # of this node.
22019                 foreach ( $i + 1 .. $max_lp_object_list ) {
22020                     $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
22021                 }
22022             }
22023         }
22024         return;
22025     } ## end sub undo_incomplete_lp_indentation
22026 } ## end closure set_lp_indentation
22027
22028 #----------------------------------------------------------------------
22029 # sub to set a requested break before an opening container in -lp mode.
22030 #----------------------------------------------------------------------
22031 sub set_forced_lp_break {
22032
22033     my ( $self, $i_begin_line, $i_opening ) = @_;
22034
22035     # Given:
22036     #   $i_begin_line = index of break in the _to_go arrays
22037     #   $i_opening = index of the opening container
22038
22039     # Set any requested break at a token before this opening container
22040     # token. This is often an '=' or '=>' but can also be things like
22041     # '.', ',', 'return'.  It was defined by sub set_lp_indentation.
22042
22043     # Important:
22044     #   For intact containers, call this at the closing token.
22045     #   For broken containers, call this at the opening token.
22046     # This will avoid needless breaks when it turns out that the
22047     # container does not actually get broken.  This isn't known until
22048     # the closing container for intact blocks.
22049
22050     return
22051       if ( $i_begin_line < 0
22052         || $i_begin_line > $max_index_to_go );
22053
22054     # Handle request to put a break break immediately before this token.
22055     # We may not want to do that since we are also breaking after it.
22056     if ( $i_begin_line == $i_opening ) {
22057
22058         # The following rules should be reviewed.  We may want to always
22059         # allow the break.  If we do not do the break, the indentation
22060         # may be off.
22061
22062         # RULE: don't break before it unless it is welded to a qw.
22063         # This works well, but we may want to relax this to allow
22064         # breaks in additional cases.
22065         return
22066           if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
22067         return unless ( $types_to_go[$max_index_to_go] eq 'q' );
22068     }
22069
22070     # Only break for breakpoints at the same
22071     # indentation level as the opening paren
22072     my $test1 = $nesting_depth_to_go[$i_opening];
22073     my $test2 = $nesting_depth_to_go[$i_begin_line];
22074     return if ( $test2 != $test1 );
22075
22076     # Back up at a blank (fixes case b932)
22077     my $ibr = $i_begin_line - 1;
22078     if (   $ibr > 0
22079         && $types_to_go[$ibr] eq 'b' )
22080     {
22081         $ibr--;
22082     }
22083     if ( $ibr >= 0 ) {
22084         my $i_nonblank = $self->set_forced_breakpoint($ibr);
22085
22086         # Crude patch to prevent sub recombine_breakpoints from undoing
22087         # this break, especially after an '='.  It will leave old
22088         # breakpoints alone. See c098/x045 for some examples.
22089         if ( defined($i_nonblank) ) {
22090             $old_breakpoint_to_go[$i_nonblank] = 1;
22091         }
22092     }
22093     return;
22094 } ## end sub set_forced_lp_break
22095
22096 sub reduce_lp_indentation {
22097
22098     # reduce the leading whitespace at token $i if possible by $spaces_needed
22099     # (a large value of $spaces_needed will remove all excess space)
22100     # NOTE: to be called from break_lists only for a sequence of tokens
22101     # contained between opening and closing parens/braces/brackets
22102
22103     my ( $self, $i, $spaces_wanted ) = @_;
22104     my $deleted_spaces = 0;
22105
22106     my $item             = $leading_spaces_to_go[$i];
22107     my $available_spaces = $item->get_available_spaces();
22108
22109     if (
22110         $available_spaces > 0
22111         && ( ( $spaces_wanted <= $available_spaces )
22112             || !$item->get_have_child() )
22113       )
22114     {
22115
22116         # we'll remove these spaces, but mark them as recoverable
22117         $deleted_spaces =
22118           $item->tentatively_decrease_available_spaces($spaces_wanted);
22119     }
22120
22121     return $deleted_spaces;
22122 } ## end sub reduce_lp_indentation
22123
22124 ###########################################################
22125 # CODE SECTION 13: Preparing batches for vertical alignment
22126 ###########################################################
22127
22128 sub check_convey_batch_input {
22129
22130     # Check for valid input to sub convey_batch_to_vertical_aligner.  An
22131     # error here would most likely be due to an error in the calling
22132     # routine 'sub grind_batch_of_CODE'.
22133     my ( $self, $ri_first, $ri_last ) = @_;
22134
22135     if ( !defined($ri_first) || !defined($ri_last) ) {
22136         Fault(<<EOM);
22137 Undefined line ranges ri_first and/r ri_last
22138 EOM
22139     }
22140
22141     my $nmax       = @{$ri_first} - 1;
22142     my $nmax_check = @{$ri_last} - 1;
22143     if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
22144         Fault(<<EOM);
22145 Line range index error: nmax=$nmax but nmax_check=$nmax_check
22146 These should be equal and >=0
22147 EOM
22148     }
22149     my ( $ibeg, $iend );
22150     foreach my $n ( 0 .. $nmax ) {
22151         my $ibeg_m = $ibeg;
22152         my $iend_m = $iend;
22153         $ibeg = $ri_first->[$n];
22154         $iend = $ri_last->[$n];
22155         if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
22156             Fault(<<EOM);
22157 Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
22158 These should have iend >= ibeg and be in the range (0..$max_index_to_go)
22159 EOM
22160         }
22161         next if ( $n == 0 );
22162         if ( $ibeg <= $iend_m ) {
22163             Fault(<<EOM);
22164 Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
22165 EOM
22166         }
22167     }
22168     return;
22169 } ## end sub check_convey_batch_input
22170
22171 sub convey_batch_to_vertical_aligner {
22172
22173     my ($self) = @_;
22174
22175     # This routine receives a batch of code for which the final line breaks
22176     # have been defined. Here we prepare the lines for passing to the vertical
22177     # aligner.  We do the following tasks:
22178     # - mark certain vertical alignment tokens, such as '=', in each line
22179     # - make minor indentation adjustments
22180     # - do logical padding: insert extra blank spaces to help display certain
22181     #   logical constructions
22182
22183     my $this_batch = $self->[_this_batch_];
22184     my $ri_first   = $this_batch->[_ri_first_];
22185     my $ri_last    = $this_batch->[_ri_last_];
22186
22187     $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
22188
22189     my $n_last_line = @{$ri_first} - 1;
22190
22191     my $do_not_pad               = $this_batch->[_do_not_pad_];
22192     my $peak_batch_size          = $this_batch->[_peak_batch_size_];
22193     my $starting_in_quote        = $this_batch->[_starting_in_quote_];
22194     my $ending_in_quote          = $this_batch->[_ending_in_quote_];
22195     my $is_static_block_comment  = $this_batch->[_is_static_block_comment_];
22196     my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
22197     my $batch_CODE_type          = $this_batch->[_batch_CODE_type_];
22198
22199     my $rLL                  = $self->[_rLL_];
22200     my $Klimit               = $self->[_Klimit_];
22201     my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
22202     my $ris_list_by_seqno    = $self->[_ris_list_by_seqno_];
22203
22204     my $ibeg_next = $ri_first->[0];
22205     my $iend_next = $ri_last->[0];
22206
22207     my $type_beg_next  = $types_to_go[$ibeg_next];
22208     my $type_end_next  = $types_to_go[$iend_next];
22209     my $token_beg_next = $tokens_to_go[$ibeg_next];
22210
22211     my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
22212
22213     my $rindentation_list = [0];    # ref to indentations for each line
22214     my ( $cscw_block_comment, $closing_side_comment );
22215     if ($rOpts_closing_side_comments) {
22216         ( $closing_side_comment, $cscw_block_comment ) =
22217           $self->add_closing_side_comment( $ri_first, $ri_last );
22218     }
22219
22220     # flush before a long if statement to avoid unwanted alignment
22221     if (   $n_last_line > 0
22222         && $type_beg_next eq 'k'
22223         && $is_if_unless{$token_beg_next} )
22224     {
22225         $self->flush_vertical_aligner();
22226     }
22227
22228     $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
22229       if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
22230
22231     $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
22232         $starting_in_quote )
22233       if ( $n_last_line > 0 && $rOpts_logical_padding );
22234
22235     if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
22236
22237     # ----------------------------------------------------------
22238     # define the vertical alignments for all lines of this batch
22239     # ----------------------------------------------------------
22240     my $rline_alignments =
22241       $self->make_vertical_alignments( $ri_first, $ri_last );
22242
22243     # ----------------------------------------------
22244     # loop to send each line to the vertical aligner
22245     # ----------------------------------------------
22246     my ( $type_beg, $type_end, $token_beg );
22247
22248     for my $n ( 0 .. $n_last_line ) {
22249
22250         # ----------------------------------------------------------------
22251         # This hash will hold the args for vertical alignment of this line
22252         # We will populate it as we go.
22253         # ----------------------------------------------------------------
22254         my $rvao_args = {};
22255
22256         my $type_beg_last = $type_beg;
22257         my $type_end_last = $type_end;
22258
22259         my $ibeg = $ibeg_next;
22260         my $iend = $iend_next;
22261         my $Kbeg = $K_to_go[$ibeg];
22262         my $Kend = $K_to_go[$iend];
22263
22264         $type_beg  = $type_beg_next;
22265         $type_end  = $type_end_next;
22266         $token_beg = $token_beg_next;
22267
22268         # ---------------------------------------------------
22269         # Define the check value 'Kend' to send for this line
22270         # ---------------------------------------------------
22271         # The 'Kend' value is an integer for checking that lines come out of
22272         # the far end of the pipeline in the right order.  It increases
22273         # linearly along the token stream.  But we only send ending K values of
22274         # non-comments down the pipeline.  This is equivalent to checking that
22275         # the last CODE_type is blank or equal to 'VER'. See also sub
22276         # resync_lines_and_tokens for related coding.  Note that
22277         # '$batch_CODE_type' is the code type of the line to which the ending
22278         # token belongs.
22279         my $Kend_code =
22280           $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
22281
22282         #  $ljump is a level jump needed by 'sub final_indentation_adjustment'
22283         my $ljump = 0;
22284
22285         # Get some vars on line [n+1], if any:
22286         if ( $n < $n_last_line ) {
22287             $ibeg_next = $ri_first->[ $n + 1 ];
22288             $iend_next = $ri_last->[ $n + 1 ];
22289
22290             $type_beg_next  = $types_to_go[$ibeg_next];
22291             $type_end_next  = $types_to_go[$iend_next];
22292             $token_beg_next = $tokens_to_go[$ibeg_next];
22293
22294             my $Kbeg_next = $K_to_go[$ibeg_next];
22295             $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22296         }
22297         elsif ( !$is_block_comment && $Kend < $Klimit ) {
22298
22299             # Patch for git #51, a bare closing qw paren was not outdented
22300             # if the flag '-nodelete-old-newlines is set
22301             # Note that we are just looking ahead for the next nonblank
22302             # character. We could scan past an arbitrary number of block
22303             # comments or hanging side comments by calling K_next_code, but it
22304             # could add significant run time with very little to be gained.
22305             my $Kbeg_next = $Kend + 1;
22306             if (   $Kbeg_next < $Klimit
22307                 && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
22308             {
22309                 $Kbeg_next += 1;
22310             }
22311             $ljump =
22312               $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
22313         }
22314
22315         # ---------------------------------------------
22316         # get the vertical alignment info for this line
22317         # ---------------------------------------------
22318
22319         # The lines are broken into fields which can be spaced by the vertical
22320         # to achieve vertical alignment.  These fields are the actual text
22321         # which will be output, so from here on no more changes can be made to
22322         # the text.
22323         my $rline_alignment = $rline_alignments->[$n];
22324         my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
22325           @{$rline_alignment};
22326
22327         # Programming check: (shouldn't happen)
22328         # The number of tokens which separate the fields must always be
22329         # one less than the number of fields. If this is not true then
22330         # an error has been introduced in sub make_alignment_patterns.
22331         if (DEVEL_MODE) {
22332             if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
22333                 my $nt  = @{$rtokens};
22334                 my $nf  = @{$rfields};
22335                 my $msg = <<EOM;
22336 Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
22337 The number of tokens = $nt should be one less than number of fields: $nf
22338 EOM
22339                 Fault($msg);
22340             }
22341         }
22342
22343         # --------------------------------------
22344         # get the final indentation of this line
22345         # --------------------------------------
22346         my ( $indentation, $lev, $level_end, $terminal_type,
22347             $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
22348           = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
22349             $rpatterns,         $ri_first, $ri_last,
22350             $rindentation_list, $ljump,    $starting_in_quote,
22351             $is_static_block_comment, );
22352
22353         # --------------------------------
22354         # define flag 'outdent_long_lines'
22355         # --------------------------------
22356         if (
22357             # we will allow outdenting of long lines..
22358             # which are long quotes, if allowed
22359             ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
22360
22361             # which are long block comments, if allowed
22362             || (
22363                    $type_beg eq '#'
22364                 && $rOpts_outdent_long_comments
22365
22366                 # but not if this is a static block comment
22367                 && !$is_static_block_comment
22368             )
22369           )
22370         {
22371             $rvao_args->{outdent_long_lines} = 1;
22372
22373             # convert -lp indentation objects to spaces to allow outdenting
22374             if ( ref($indentation) ) {
22375                 $indentation = $indentation->get_spaces();
22376             }
22377         }
22378
22379         # --------------------------------------------------
22380         # define flags 'break_alignment_before' and '_after'
22381         # --------------------------------------------------
22382
22383         # These flags tell the vertical aligner to stop alignment before or
22384         # after this line.
22385         if ($is_outdented_line) {
22386             $rvao_args->{break_alignment_before} = 1;
22387             $rvao_args->{break_alignment_after}  = 1;
22388         }
22389         elsif ($do_not_pad) {
22390             $rvao_args->{break_alignment_before} = 1;
22391         }
22392
22393         # flush at an 'if' which follows a line with (1) terminal semicolon
22394         # or (2) terminal block_type which is not an 'if'.  This prevents
22395         # unwanted alignment between the lines.
22396         elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
22397             my $type_m = 'b';
22398             my $block_type_m;
22399
22400             if ( $Kbeg > 0 ) {
22401                 my $Km = $Kbeg - 1;
22402                 $type_m = $rLL->[$Km]->[_TYPE_];
22403                 if ( $type_m eq 'b' && $Km > 0 ) {
22404                     $Km -= 1;
22405                     $type_m = $rLL->[$Km]->[_TYPE_];
22406                 }
22407                 if ( $type_m eq '#' && $Km > 0 ) {
22408                     $Km -= 1;
22409                     $type_m = $rLL->[$Km]->[_TYPE_];
22410                     if ( $type_m eq 'b' && $Km > 0 ) {
22411                         $Km -= 1;
22412                         $type_m = $rLL->[$Km]->[_TYPE_];
22413                     }
22414                 }
22415
22416                 my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
22417                 if ($seqno_m) {
22418                     $block_type_m = $rblock_type_of_seqno->{$seqno_m};
22419                 }
22420             }
22421
22422             # break after anything that is not if-like
22423             if (
22424                 $type_m eq ';'
22425                 || (   $type_m eq '}'
22426                     && $block_type_m
22427                     && $block_type_m ne 'if'
22428                     && $block_type_m ne 'unless'
22429                     && $block_type_m ne 'elsif'
22430                     && $block_type_m ne 'else' )
22431               )
22432             {
22433                 $rvao_args->{break_alignment_before} = 1;
22434             }
22435         }
22436
22437         # ----------------------------------
22438         # define 'rvertical_tightness_flags'
22439         # ----------------------------------
22440         # These flags tell the vertical aligner if/when to combine consecutive
22441         # lines, based on the user input parameters.
22442         $rvao_args->{rvertical_tightness_flags} =
22443           $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
22444             $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
22445           if ( !$is_block_comment );
22446
22447         # ----------------------------------
22448         # define 'is_terminal_ternary'  flag
22449         # ----------------------------------
22450
22451         # This flag is set at the final ':' of a ternary chain to request
22452         # vertical alignment of the final term.  Here is a slightly complex
22453         # example:
22454         #
22455         # $self->{_text} = (
22456         #    !$section        ? ''
22457         #   : $type eq 'item' ? "the $section entry"
22458         #   :                   "the section on $section"
22459         # )
22460         # . (
22461         #   $page
22462         #   ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
22463         #   : ' elsewhere in this document'
22464         # );
22465         #
22466         if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
22467
22468             my $is_terminal_ternary = 0;
22469             my $last_leading_type   = $n > 0 ? $type_beg_last : ':';
22470             if (   $terminal_type ne ';'
22471                 && $n_last_line > $n
22472                 && $level_end == $lev )
22473             {
22474                 my $Kbeg_next = $K_to_go[$ibeg_next];
22475                 $level_end     = $rLL->[$Kbeg_next]->[_LEVEL_];
22476                 $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
22477             }
22478             if (
22479                 $last_leading_type eq ':'
22480                 && (   ( $terminal_type eq ';' && $level_end <= $lev )
22481                     || ( $terminal_type ne ':' && $level_end < $lev ) )
22482               )
22483             {
22484
22485                 # the terminal term must not contain any ternary terms, as in
22486                 # my $ECHO = (
22487                 #       $Is_MSWin32 ? ".\\echo$$"
22488                 #     : $Is_MacOS   ? ":echo$$"
22489                 #     : ( $Is_NetWare ? "echo$$" : "./echo$$" )
22490                 # );
22491                 $is_terminal_ternary = 1;
22492
22493                 my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
22494                 while ( defined($KP) && $KP <= $Kend ) {
22495                     my $type_KP = $rLL->[$KP]->[_TYPE_];
22496                     if ( $type_KP eq '?' || $type_KP eq ':' ) {
22497                         $is_terminal_ternary = 0;
22498                         last;
22499                     }
22500                     $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
22501                 }
22502             }
22503             $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
22504         }
22505
22506         # -------------------------------------------------
22507         # add any new closing side comment to the last line
22508         # -------------------------------------------------
22509         if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
22510
22511             $rfields->[-1] .= " $closing_side_comment";
22512
22513             # NOTE: Patch for csc. We can just use 1 for the length of the csc
22514             # because its length should not be a limiting factor from here on.
22515             $rfield_lengths->[-1] += 2;
22516
22517             # repack
22518             $rline_alignment =
22519               [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
22520         }
22521
22522         # ------------------------
22523         # define flag 'list_seqno'
22524         # ------------------------
22525
22526         # This flag indicates if this line is contained in a multi-line list
22527         if ( !$is_block_comment ) {
22528             my $parent_seqno = $parent_seqno_to_go[$ibeg];
22529             $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
22530         }
22531
22532         # The alignment tokens have been marked with nesting_depths, so we need
22533         # to pass nesting depths to the vertical aligner. They remain invariant
22534         # under all formatting operations.  Previously, level values were sent
22535         # to the aligner.  But they can be altered in welding and other
22536         # operations, and this can lead to alignment errors.
22537         my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
22538         my $nesting_depth_end = $nesting_depth_to_go[$iend];
22539
22540         # A quirk in the definition of nesting depths is that the closing token
22541         # has the same depth as internal tokens.  The vertical aligner is
22542         # programmed to expect them to have the lower depth, so we fix this.
22543         if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
22544         if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
22545
22546         # Adjust nesting depths to keep -lp indentation for qw lists.  This is
22547         # required because qw lists contained in brackets do not get nesting
22548         # depths, but the vertical aligner is watching nesting depth changes to
22549         # decide if a -lp block is intact.  Without this patch, qw lists
22550         # enclosed in angle brackets will not get the correct -lp indentation.
22551
22552         # Looking for line with isolated qw ...
22553         if (   $rOpts_line_up_parentheses
22554             && $type_beg eq 'q'
22555             && $ibeg == $iend )
22556         {
22557
22558             # ... which is part of a multiline qw
22559             my $Km = $self->K_previous_nonblank($Kbeg);
22560             my $Kp = $self->K_next_nonblank($Kbeg);
22561             if (   defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
22562                 || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
22563             {
22564                 $nesting_depth_beg++;
22565                 $nesting_depth_end++;
22566             }
22567         }
22568
22569         # ---------------------------------
22570         # define flag 'forget_side_comment'
22571         # ---------------------------------
22572
22573         # This flag tells the vertical aligner to reset the side comment
22574         # location if we are entering a new block from level 0.  This is
22575         # intended to keep side comments from drifting too far to the right.
22576         if (   $terminal_block_type
22577             && $nesting_depth_end > $nesting_depth_beg )
22578         {
22579             my $level_adj        = $lev;
22580             my $radjusted_levels = $self->[_radjusted_levels_];
22581             if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
22582             {
22583                 $level_adj = $radjusted_levels->[$Kbeg];
22584                 if ( $level_adj < 0 ) { $level_adj = 0 }
22585             }
22586             if ( $level_adj == 0 ) {
22587                 $rvao_args->{forget_side_comment} = 1;
22588             }
22589         }
22590
22591         # -----------------------------------
22592         # Store the remaining non-flag values
22593         # -----------------------------------
22594         $rvao_args->{Kend}            = $Kend_code;
22595         $rvao_args->{ci_level}        = $ci_levels_to_go[$ibeg];
22596         $rvao_args->{indentation}     = $indentation;
22597         $rvao_args->{level_end}       = $nesting_depth_end;
22598         $rvao_args->{level}           = $nesting_depth_beg;
22599         $rvao_args->{rline_alignment} = $rline_alignment;
22600         $rvao_args->{maximum_line_length} =
22601           $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
22602
22603         # --------------------------------------
22604         # send this line to the vertical aligner
22605         # --------------------------------------
22606         my $vao = $self->[_vertical_aligner_object_];
22607         $vao->valign_input($rvao_args);
22608
22609         $do_not_pad = 0;
22610
22611         # Set flag indicating if this line ends in an opening
22612         # token and is very short, so that a blank line is not
22613         # needed if the subsequent line is a comment.
22614         # Examples of what we are looking for:
22615         #   {
22616         #   && (
22617         #   BEGIN {
22618         #   default {
22619         #   sub {
22620         $self->[_last_output_short_opening_token_]
22621
22622           # line ends in opening token
22623           #              /^[\{\(\[L]$/
22624           = $is_opening_type{$type_end}
22625
22626           # and either
22627           && (
22628             # line has either single opening token
22629             $Kend == $Kbeg
22630
22631             # or is a single token followed by opening token.
22632             # Note that sub identifiers have blanks like 'sub doit'
22633             #                                 $token_beg !~ /\s+/
22634             || ( $Kend - $Kbeg <= 2 && index( $token_beg, SPACE ) < 0 )
22635           )
22636
22637           # and limit total to 10 character widths
22638           && token_sequence_length( $ibeg, $iend ) <= 10;
22639
22640     } ## end of loop to output each line
22641
22642     # remember indentation of lines containing opening containers for
22643     # later use by sub final_indentation_adjustment
22644     $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
22645       if ( !$is_block_comment );
22646
22647     # output any new -cscw block comment
22648     if ($cscw_block_comment) {
22649         $self->flush_vertical_aligner();
22650         my $file_writer_object = $self->[_file_writer_object_];
22651         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
22652     }
22653     return;
22654 } ## end sub convey_batch_to_vertical_aligner
22655
22656 sub check_batch_summed_lengths {
22657
22658     my ( $self, $msg ) = @_;
22659     $msg = EMPTY_STRING unless defined($msg);
22660     my $rLL = $self->[_rLL_];
22661
22662     # Verify that the summed lengths are correct. We want to be sure that
22663     # errors have not been introduced by programming changes.  Summed lengths
22664     # are defined in sub $store_token.  Operations like padding and unmasking
22665     # semicolons can change token lengths, but those operations are expected to
22666     # update the summed lengths when they make changes.  So the summed lengths
22667     # should always be correct.
22668     foreach my $i ( 0 .. $max_index_to_go ) {
22669         my $len_by_sum =
22670           $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
22671         my $len_tok_i = $token_lengths_to_go[$i];
22672         my $KK        = $K_to_go[$i];
22673         my $len_tok_K;
22674         if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
22675         if ( $len_by_sum != $len_tok_i
22676             || defined($len_tok_K) && $len_by_sum != $len_tok_K )
22677         {
22678             my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
22679             $KK = 'undef' unless defined($KK);
22680             my $tok  = $tokens_to_go[$i];
22681             my $type = $types_to_go[$i];
22682             Fault(<<EOM);
22683 Summed lengths are appear to be incorrect.  $msg
22684 lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
22685 near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
22686 EOM
22687         }
22688     }
22689     return;
22690 } ## end sub check_batch_summed_lengths
22691
22692 {    ## begin closure set_vertical_alignment_markers
22693     my %is_vertical_alignment_type;
22694     my %is_not_vertical_alignment_token;
22695     my %is_vertical_alignment_keyword;
22696     my %is_terminal_alignment_type;
22697     my %is_low_level_alignment_token;
22698
22699     BEGIN {
22700
22701         my @q;
22702
22703         # Replaced =~ and // in the list.  // had been removed in RT 119588
22704         @q = qw#
22705           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
22706           { ? : => && || ~~ !~~ =~ !~ // <=> ->
22707           #;
22708         @is_vertical_alignment_type{@q} = (1) x scalar(@q);
22709
22710         # These 'tokens' are not aligned. We need this to remove [
22711         # from the above list because it has type ='{'
22712         @q = qw([);
22713         @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
22714
22715         # these are the only types aligned at a line end
22716         @q = qw(&& || =>);
22717         @is_terminal_alignment_type{@q} = (1) x scalar(@q);
22718
22719         # these tokens only align at line level
22720         @q = ( '{', '(' );
22721         @is_low_level_alignment_token{@q} = (1) x scalar(@q);
22722
22723         # eq and ne were removed from this list to improve alignment chances
22724         @q = qw(if unless and or err for foreach while until);
22725         @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
22726     }
22727
22728     sub set_vertical_alignment_markers {
22729
22730         # This routine takes the first step toward vertical alignment of the
22731         # lines of output text.  It looks for certain tokens which can serve as
22732         # vertical alignment markers (such as an '=').
22733         #
22734         # Method: We look at each token $i in this output batch and set
22735         # $ralignment_type_to_go->[$i] equal to those tokens at which we would
22736         # accept vertical alignment.
22737
22738         my ( $self, $ri_first, $ri_last ) = @_;
22739
22740         my $ralignment_type_to_go;
22741         my $ralignment_counts       = [];
22742         my $ralignment_hash_by_line = [];
22743
22744         # NOTE: closing side comments can insert up to 2 additional tokens
22745         # beyond the original $max_index_to_go, so we need to check ri_last for
22746         # the last index.
22747         my $max_line = @{$ri_first} - 1;
22748         my $max_i    = $ri_last->[$max_line];
22749         if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
22750
22751         # -----------------------------------------------------------------
22752         # Shortcut:
22753         #    - no alignments if there is only 1 token.
22754         #    - and nothing to do if we aren't allowed to change whitespace.
22755         # -----------------------------------------------------------------
22756         if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
22757             return ( $ralignment_type_to_go, $ralignment_counts,
22758                 $ralignment_hash_by_line );
22759         }
22760
22761         my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
22762         my $ris_function_call_paren    = $self->[_ris_function_call_paren_];
22763         my $rLL                        = $self->[_rLL_];
22764
22765         # -------------------------------
22766         # First handle any side comment.
22767         # -------------------------------
22768         my $i_terminal = $max_i;
22769         if ( $types_to_go[$max_i] eq '#' ) {
22770
22771             # We know $max_i > 0 if we get here.
22772             $i_terminal -= 1;
22773             if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
22774                 $i_terminal -= 1;
22775             }
22776
22777             my $token = $tokens_to_go[$max_i];
22778             my $KK    = $K_to_go[$max_i];
22779
22780             # Do not align various special side comments
22781             my $do_not_align = (
22782
22783                 # it is any specially marked side comment
22784                 ( defined($KK) && $rspecial_side_comment_type->{$KK} )
22785
22786                 # or it is a static side comment
22787                   || ( $rOpts->{'static-side-comments'}
22788                     && $token =~ /$static_side_comment_pattern/ )
22789
22790                   # or a closing side comment
22791                   || ( $types_to_go[$i_terminal] eq '}'
22792                     && $tokens_to_go[$i_terminal] eq '}'
22793                     && $token =~ /$closing_side_comment_prefix_pattern/ )
22794             );
22795
22796             # - For the specific combination -vc -nvsc, we put all side comments
22797             #   at fixed locations. Note that we will lose hanging side comment
22798             #   alignments. Otherwise, hsc's can move to strange locations.
22799             # - For -nvc -nvsc we make all side comments vertical alignments
22800             #   because the vertical aligner will check for -nvsc and be able
22801             #   to reduce the final padding to the side comments for long lines.
22802             #   and keep hanging side comments aligned.
22803             if (   !$do_not_align
22804                 && !$rOpts_valign_side_comments
22805                 && $rOpts_valign_code )
22806             {
22807
22808                 $do_not_align = 1;
22809                 my $ipad = $max_i - 1;
22810                 if ( $types_to_go[$ipad] eq 'b' ) {
22811                     my $pad_spaces =
22812                       $rOpts->{'minimum-space-to-comment'} -
22813                       $token_lengths_to_go[$ipad];
22814                     $self->pad_token( $ipad, $pad_spaces );
22815                 }
22816             }
22817
22818             if ( !$do_not_align ) {
22819                 $ralignment_type_to_go->[$max_i] = '#';
22820                 $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
22821                 $ralignment_counts->[$max_line]++;
22822             }
22823         }
22824
22825         # ----------------------------------------------
22826         # Nothing more to do on this line if -nvc is set
22827         # ----------------------------------------------
22828         if ( !$rOpts_valign_code ) {
22829             return ( $ralignment_type_to_go, $ralignment_counts,
22830                 $ralignment_hash_by_line );
22831         }
22832
22833         # -------------------------------------
22834         # Loop over each line of this batch ...
22835         # -------------------------------------
22836         my $last_vertical_alignment_BEFORE_index;
22837         my $vert_last_nonblank_type;
22838         my $vert_last_nonblank_token;
22839
22840         foreach my $line ( 0 .. $max_line ) {
22841
22842             my $ibeg = $ri_first->[$line];
22843             my $iend = $ri_last->[$line];
22844
22845             next if ( $iend <= $ibeg );
22846
22847             # back up before any side comment
22848             if ( $iend > $i_terminal ) { $iend = $i_terminal }
22849
22850             my $level_beg = $levels_to_go[$ibeg];
22851             my $token_beg = $tokens_to_go[$ibeg];
22852             my $type_beg  = $types_to_go[$ibeg];
22853             my $type_beg_special_char =
22854               ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
22855
22856             $last_vertical_alignment_BEFORE_index = -1;
22857             $vert_last_nonblank_type              = $type_beg;
22858             $vert_last_nonblank_token             = $token_beg;
22859
22860             # ----------------------------------------------------------------
22861             # Initialization code merged from 'sub delete_needless_alignments'
22862             # ----------------------------------------------------------------
22863             my $i_good_paren  = -1;
22864             my $i_elsif_close = $ibeg - 1;
22865             my $i_elsif_open  = $iend + 1;
22866             my @imatch_list;
22867             if ( $type_beg eq 'k' ) {
22868
22869                 # Initialization for paren patch: mark a location of a paren we
22870                 # should keep, such as one following something like a leading
22871                 # 'if', 'elsif',
22872                 $i_good_paren = $ibeg + 1;
22873                 if ( $types_to_go[$i_good_paren] eq 'b' ) {
22874                     $i_good_paren++;
22875                 }
22876
22877                 # Initialization for 'elsif' patch: remember the paren range of
22878                 # an elsif, and do not make alignments within them because this
22879                 # can cause loss of padding and overall brace alignment in the
22880                 # vertical aligner.
22881                 if (   $token_beg eq 'elsif'
22882                     && $i_good_paren < $iend
22883                     && $tokens_to_go[$i_good_paren] eq '(' )
22884                 {
22885                     $i_elsif_open  = $i_good_paren;
22886                     $i_elsif_close = $mate_index_to_go[$i_good_paren];
22887                 }
22888             } ## end if ( $type_beg eq 'k' )
22889
22890             # --------------------------------------------
22891             # Loop over each token in this output line ...
22892             # --------------------------------------------
22893             foreach my $i ( $ibeg + 1 .. $iend ) {
22894
22895                 next if ( $types_to_go[$i] eq 'b' );
22896
22897                 my $type           = $types_to_go[$i];
22898                 my $token          = $tokens_to_go[$i];
22899                 my $alignment_type = EMPTY_STRING;
22900
22901                 # ----------------------------------------------
22902                 # Check for 'paren patch' : Remove excess parens
22903                 # ----------------------------------------------
22904
22905                 # Excess alignment of parens can prevent other good alignments.
22906                 # For example, note the parens in the first two rows of the
22907                 # following snippet.  They would normally get marked for
22908                 # alignment and aligned as follows:
22909
22910                 #    my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
22911                 #    my $h = $rows * $cell_h +    ( $rows + 1 ) * $border;
22912                 #    my $img = new Gimp::Image( $w, $h, RGB );
22913
22914                 # This causes unnecessary paren alignment and prevents the
22915                 # third equals from aligning. If we remove the unwanted
22916                 # alignments we get:
22917
22918                 #    my $w   = $columns * $cell_w + ( $columns + 1 ) * $border;
22919                 #    my $h   = $rows * $cell_h + ( $rows + 1 ) * $border;
22920                 #    my $img = new Gimp::Image( $w, $h, RGB );
22921
22922                 # A rule for doing this which works well is to remove alignment
22923                 # of parens whose containers do not contain other aligning
22924                 # tokens, with the exception that we always keep alignment of
22925                 # the first opening paren on a line (for things like 'if' and
22926                 # 'elsif' statements).
22927                 if ( $token eq ')' && @imatch_list ) {
22928
22929                     # undo the corresponding opening paren if:
22930                     # - it is at the top of the stack
22931                     # - and not the first overall opening paren
22932                     # - does not follow a leading keyword on this line
22933                     my $imate = $mate_index_to_go[$i];
22934                     if (   $imatch_list[-1] eq $imate
22935                         && ( $ibeg > 1 || @imatch_list > 1 )
22936                         && $imate > $i_good_paren )
22937                     {
22938                         if ( $ralignment_type_to_go->[$imate] ) {
22939                             $ralignment_type_to_go->[$imate] = EMPTY_STRING;
22940                             $ralignment_counts->[$line]--;
22941                             delete $ralignment_hash_by_line->[$line]->{$imate};
22942                         }
22943                         pop @imatch_list;
22944                     }
22945                 }
22946
22947                 # do not align tokens at lower level than start of line
22948                 # except for side comments
22949                 if ( $levels_to_go[$i] < $level_beg ) {
22950                     next;
22951                 }
22952
22953                 #--------------------------------------------------------
22954                 # First see if we want to align BEFORE this token
22955                 #--------------------------------------------------------
22956
22957                 # The first possible token that we can align before
22958                 # is index 2 because: 1) it doesn't normally make sense to
22959                 # align before the first token and 2) the second
22960                 # token must be a blank if we are to align before
22961                 # the third
22962                 if ( $i < $ibeg + 2 ) { }
22963
22964                 # must follow a blank token
22965                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
22966
22967                 # otherwise, do not align two in a row to create a
22968                 # blank field
22969                 elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
22970
22971                 # align before one of these keywords
22972                 # (within a line, since $i>1)
22973                 elsif ( $type eq 'k' ) {
22974
22975                     #  /^(if|unless|and|or|eq|ne)$/
22976                     if ( $is_vertical_alignment_keyword{$token} ) {
22977                         $alignment_type = $token;
22978                     }
22979                 }
22980
22981                 # align qw in a 'use' statement (issue git #93)
22982                 elsif ( $type eq 'q' ) {
22983                     if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
22984                         $alignment_type = $type;
22985                     }
22986                 }
22987
22988                 # align before one of these types..
22989                 elsif ( $is_vertical_alignment_type{$type}
22990                     && !$is_not_vertical_alignment_token{$token} )
22991                 {
22992                     $alignment_type = $token;
22993
22994                     # Do not align a terminal token.  Although it might
22995                     # occasionally look ok to do this, this has been found to be
22996                     # a good general rule.  The main problems are:
22997                     # (1) that the terminal token (such as an = or :) might get
22998                     # moved far to the right where it is hard to see because
22999                     # nothing follows it, and
23000                     # (2) doing so may prevent other good alignments.
23001                     # Current exceptions are && and || and =>
23002                     if ( $i == $iend ) {
23003                         $alignment_type = EMPTY_STRING
23004                           unless ( $is_terminal_alignment_type{$type} );
23005                     }
23006
23007                     # Do not align leading ': (' or '. ('.  This would prevent
23008                     # alignment in something like the following:
23009                     #   $extra_space .=
23010                     #       ( $input_line_number < 10 )  ? "  "
23011                     #     : ( $input_line_number < 100 ) ? " "
23012                     #     :                                "";
23013                     # or
23014                     #  $code =
23015                     #      ( $case_matters ? $accessor : " lc($accessor) " )
23016                     #    . ( $yesno        ? " eq "       : " ne " )
23017
23018                     # Also, do not align a ( following a leading ? so we can
23019                     # align something like this:
23020                     #   $converter{$_}->{ushortok} =
23021                     #     $PDL::IO::Pic::biggrays
23022                     #     ? ( m/GIF/          ? 0 : 1 )
23023                     #     : ( m/GIF|RAST|IFF/ ? 0 : 1 );
23024                     if (   $type_beg_special_char
23025                         && $i == $ibeg + 2
23026                         && $types_to_go[ $i - 1 ] eq 'b' )
23027                     {
23028                         $alignment_type = EMPTY_STRING;
23029                     }
23030
23031                     # Certain tokens only align at the same level as the
23032                     # initial line level
23033                     if (   $is_low_level_alignment_token{$token}
23034                         && $levels_to_go[$i] != $level_beg )
23035                     {
23036                         $alignment_type = EMPTY_STRING;
23037                     }
23038
23039                     # For a paren after keyword, only align something like this:
23040                     #    if    ( $a ) { &a }
23041                     #    elsif ( $b ) { &b }
23042                     if ( $token eq '(' ) {
23043
23044                         if ( $vert_last_nonblank_type eq 'k' ) {
23045                             $alignment_type = EMPTY_STRING
23046                               unless
23047                               $is_if_unless_elsif{$vert_last_nonblank_token};
23048                             ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/;
23049                         }
23050
23051                         # Do not align a spaced-function-paren if requested.
23052                         # Issue git #53, #73.
23053                         if ( !$rOpts_function_paren_vertical_alignment ) {
23054                             my $seqno = $type_sequence_to_go[$i];
23055                             if ( $ris_function_call_paren->{$seqno} ) {
23056                                 $alignment_type = EMPTY_STRING;
23057                             }
23058                         }
23059
23060                         # make () align with qw in a 'use' statement (git #93)
23061                         if (   $tokens_to_go[0] eq 'use'
23062                             && $types_to_go[0] eq 'k'
23063                             && $mate_index_to_go[$i] == $i + 1 )
23064                         {
23065                             $alignment_type = 'q';
23066                         }
23067                     }
23068
23069                     # be sure the alignment tokens are unique
23070                     # This didn't work well: reason not determined
23071                     # if ($token ne $type) {$alignment_type .= $type}
23072                 }
23073
23074                 # NOTE: This is deactivated because it causes the previous
23075                 # if/elsif alignment to fail
23076                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
23077                 #{ $alignment_type = $type; }
23078
23079                 if ($alignment_type) {
23080                     $last_vertical_alignment_BEFORE_index = $i;
23081                 }
23082
23083                 #--------------------------------------------------------
23084                 # Next see if we want to align AFTER the previous nonblank
23085                 #--------------------------------------------------------
23086
23087                 # We want to line up ',' and interior ';' tokens, with the added
23088                 # space AFTER these tokens.  (Note: interior ';' is included
23089                 # because it may occur in short blocks).
23090                 elsif (
23091
23092                     # we haven't already set it
23093                     ##!$alignment_type
23094
23095                     # previous token IS one of these:
23096                     (
23097                            $vert_last_nonblank_type eq ','
23098                         || $vert_last_nonblank_type eq ';'
23099                     )
23100
23101                     # and its not the first token of the line
23102                     ## && $i > $ibeg
23103
23104                     # and it follows a blank
23105                     && $types_to_go[ $i - 1 ] eq 'b'
23106
23107                     # and it's NOT one of these
23108                     && !$is_closing_token{$type}
23109
23110                     # then go ahead and align
23111                   )
23112
23113                 {
23114                     $alignment_type = $vert_last_nonblank_type;
23115                 }
23116
23117                 #-----------------------
23118                 # Set the alignment type
23119                 #-----------------------
23120                 if ($alignment_type) {
23121
23122                     # but do not align the opening brace of an anonymous sub
23123                     if (   $token eq '{'
23124                         && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
23125                     {
23126
23127                     }
23128
23129                     # and do not make alignments within 'elsif' parens
23130                     elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
23131
23132                     }
23133
23134                     # and ignore any tokens which have leading padded spaces
23135                     # example: perl527/lop.t
23136                     elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
23137
23138                     }
23139
23140                     else {
23141                         $ralignment_type_to_go->[$i] = $alignment_type;
23142                         $ralignment_hash_by_line->[$line]->{$i} =
23143                           $alignment_type;
23144                         $ralignment_counts->[$line]++;
23145                         push @imatch_list, $i;
23146                     }
23147                 }
23148
23149                 $vert_last_nonblank_type  = $type;
23150                 $vert_last_nonblank_token = $token;
23151             }
23152         }
23153
23154         return ( $ralignment_type_to_go, $ralignment_counts,
23155             $ralignment_hash_by_line );
23156     } ## end sub set_vertical_alignment_markers
23157 } ## end closure set_vertical_alignment_markers
23158
23159 sub make_vertical_alignments {
23160     my ( $self, $ri_first, $ri_last ) = @_;
23161
23162     #----------------------------
23163     # Shortcut for a single token
23164     #----------------------------
23165     if ( $max_index_to_go == 0 ) {
23166         if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
23167             my $rtokens   = [];
23168             my $rfields   = [ $tokens_to_go[0] ];
23169             my $rpatterns = [ $types_to_go[0] ];
23170             my $rfield_lengths =
23171               [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
23172             return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
23173         }
23174
23175         # Strange line packing, not fatal but should not happen
23176         elsif (DEVEL_MODE) {
23177             my $max_line = @{$ri_first} - 1;
23178             my $ibeg     = $ri_first->[0];
23179             my $iend     = $ri_last->[0];
23180             my $tok_b    = $tokens_to_go[$ibeg];
23181             my $tok_e    = $tokens_to_go[$iend];
23182             my $type_b   = $types_to_go[$ibeg];
23183             my $type_e   = $types_to_go[$iend];
23184             Fault(
23185 "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
23186             );
23187         }
23188     }
23189
23190     #---------------------------------------------------------
23191     # Step 1: Define the alignment tokens for the entire batch
23192     #---------------------------------------------------------
23193     my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
23194       = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
23195
23196     #----------------------------------------------
23197     # Step 2: Break each line into alignment fields
23198     #----------------------------------------------
23199     my $rline_alignments = [];
23200     my $max_line         = @{$ri_first} - 1;
23201     foreach my $line ( 0 .. $max_line ) {
23202
23203         my $ibeg = $ri_first->[$line];
23204         my $iend = $ri_last->[$line];
23205
23206         my $rtok_fld_pat_len = $self->make_alignment_patterns(
23207             $ibeg, $iend, $ralignment_type_to_go,
23208             $ralignment_counts->[$line],
23209             $ralignment_hash_by_line->[$line]
23210         );
23211         push @{$rline_alignments}, $rtok_fld_pat_len;
23212     }
23213     return $rline_alignments;
23214 } ## end sub make_vertical_alignments
23215
23216 sub get_seqno {
23217
23218     # get opening and closing sequence numbers of a token for the vertical
23219     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
23220     # to be treated somewhat like opening and closing tokens for stacking
23221     # tokens by the vertical aligner.
23222     my ( $self, $ii, $ending_in_quote ) = @_;
23223
23224     my $rLL = $self->[_rLL_];
23225
23226     my $KK    = $K_to_go[$ii];
23227     my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
23228
23229     if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
23230         my $SEQ_QW = -1;
23231         my $token  = $rLL->[$KK]->[_TOKEN_];
23232         if ( $ii > 0 ) {
23233             $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
23234         }
23235         else {
23236             if ( !$ending_in_quote ) {
23237                 $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
23238             }
23239         }
23240     }
23241     return ($seqno);
23242 } ## end sub get_seqno
23243
23244 {
23245     my %undo_extended_ci;
23246
23247     sub initialize_undo_ci {
23248         %undo_extended_ci = ();
23249         return;
23250     }
23251
23252     sub undo_ci {
23253
23254         # Undo continuation indentation in certain sequences
23255         my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
23256         my ( $line_1, $line_2, $lev_last );
23257         my $this_line_is_semicolon_terminated;
23258         my $max_line = @{$ri_first} - 1;
23259
23260         my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
23261
23262         # Prepare a list of controlling indexes for each line if required.
23263         # This is used for efficient processing below.  Note: this is
23264         # critical for speed. In the initial implementation I just looped
23265         # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
23266         # found that this routine was causing a huge run time in large lists.
23267         # On a very large list test case, this new coding dropped the run time
23268         # of this routine from 30 seconds to 169 milliseconds.
23269         my @i_controlling_ci;
23270         if ( @{$rix_seqno_controlling_ci} ) {
23271             my @tmp     = reverse @{$rix_seqno_controlling_ci};
23272             my $ix_next = pop @tmp;
23273             foreach my $line ( 0 .. $max_line ) {
23274                 my $iend = $ri_last->[$line];
23275                 while ( defined($ix_next) && $ix_next <= $iend ) {
23276                     push @{ $i_controlling_ci[$line] }, $ix_next;
23277                     $ix_next = pop @tmp;
23278                 }
23279             }
23280         }
23281
23282         # Loop over all lines of the batch ...
23283
23284         # Workaround originally created for problem c007, in which the
23285         # combination -lp -xci could produce a "Program bug" message in unusual
23286         # circumstances.
23287         my $skip_SECTION_1;
23288         if (   $rOpts_line_up_parentheses
23289             && $rOpts_extended_continuation_indentation )
23290         {
23291
23292             # Only set this flag if -lp is actually used here
23293             foreach my $line ( 0 .. $max_line ) {
23294                 my $ibeg = $ri_first->[$line];
23295                 if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
23296                     $skip_SECTION_1 = 1;
23297                     last;
23298                 }
23299             }
23300         }
23301
23302         foreach my $line ( 0 .. $max_line ) {
23303
23304             my $ibeg = $ri_first->[$line];
23305             my $iend = $ri_last->[$line];
23306             my $lev  = $levels_to_go[$ibeg];
23307
23308             #-----------------------------------
23309             # SECTION 1: Undo needless common CI
23310             #-----------------------------------
23311
23312             # We are looking at leading tokens and looking for a sequence all
23313             # at the same level and all at a higher level than enclosing lines.
23314
23315             # For example, we can undo continuation indentation in sort/map/grep
23316             # chains
23317
23318             #    my $dat1 = pack( "n*",
23319             #        map { $_, $lookup->{$_} }
23320             #          sort { $a <=> $b }
23321             #          grep { $lookup->{$_} ne $default } keys %$lookup );
23322
23323             # to become
23324
23325             #    my $dat1 = pack( "n*",
23326             #        map { $_, $lookup->{$_} }
23327             #        sort { $a <=> $b }
23328             #        grep { $lookup->{$_} ne $default } keys %$lookup );
23329
23330             if ( $line > 0 && !$skip_SECTION_1 ) {
23331
23332                 # if we have started a chain..
23333                 if ($line_1) {
23334
23335                     # see if it continues..
23336                     if ( $lev == $lev_last ) {
23337                         if (   $types_to_go[$ibeg] eq 'k'
23338                             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23339                         {
23340
23341                             # chain continues...
23342                             # check for chain ending at end of a statement
23343                             if ( $line == $max_line ) {
23344
23345                                 # see of this line ends a statement
23346                                 $this_line_is_semicolon_terminated =
23347                                   $types_to_go[$iend] eq ';'
23348
23349                                   # with possible side comment
23350                                   || ( $types_to_go[$iend] eq '#'
23351                                     && $iend - $ibeg >= 2
23352                                     && $types_to_go[ $iend - 2 ] eq ';'
23353                                     && $types_to_go[ $iend - 1 ] eq 'b' );
23354                             }
23355                             $line_2 = $line
23356                               if ($this_line_is_semicolon_terminated);
23357                         }
23358                         else {
23359
23360                             # kill chain
23361                             $line_1 = undef;
23362                         }
23363                     }
23364                     elsif ( $lev < $lev_last ) {
23365
23366                         # chain ends with previous line
23367                         $line_2 = $line - 1;
23368                     }
23369                     elsif ( $lev > $lev_last ) {
23370
23371                         # kill chain
23372                         $line_1 = undef;
23373                     }
23374
23375                     # undo the continuation indentation if a chain ends
23376                     if ( defined($line_2) && defined($line_1) ) {
23377                         my $continuation_line_count = $line_2 - $line_1 + 1;
23378                         @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
23379                           = (0) x ($continuation_line_count)
23380                           if ( $continuation_line_count >= 0 );
23381                         @leading_spaces_to_go[ @{$ri_first}
23382                           [ $line_1 .. $line_2 ] ] =
23383                           @reduced_spaces_to_go[ @{$ri_first}
23384                           [ $line_1 .. $line_2 ] ];
23385                         $line_1 = undef;
23386                     }
23387                 }
23388
23389                 # not in a chain yet..
23390                 else {
23391
23392                     # look for start of a new sort/map/grep chain
23393                     if ( $lev > $lev_last ) {
23394                         if (   $types_to_go[$ibeg] eq 'k'
23395                             && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
23396                         {
23397                             $line_1 = $line;
23398                         }
23399                     }
23400                 }
23401             }
23402
23403             #-------------------------------------
23404             # SECTION 2: Undo ci at cuddled blocks
23405             #-------------------------------------
23406
23407             # Note that sub final_indentation_adjustment will be called later to
23408             # actually do this, but for now we will tentatively mark cuddled
23409             # lines with ci=0 so that the the -xci loop which follows will be
23410             # correct at cuddles.
23411             if (
23412                 $types_to_go[$ibeg] eq '}'
23413                 && ( $nesting_depth_to_go[$iend] + 1 ==
23414                     $nesting_depth_to_go[$ibeg] )
23415               )
23416             {
23417                 my $terminal_type = $types_to_go[$iend];
23418                 if ( $terminal_type eq '#' && $iend > $ibeg ) {
23419                     $terminal_type = $types_to_go[ $iend - 1 ];
23420                     if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
23421                         $terminal_type = $types_to_go[ $iend - 2 ];
23422                     }
23423                 }
23424                 if ( $terminal_type eq '{' ) {
23425                     my $Kbeg = $K_to_go[$ibeg];
23426                     $ci_levels_to_go[$ibeg] = 0;
23427                 }
23428             }
23429
23430             #--------------------------------------------------------
23431             # SECTION 3: Undo ci set by sub extended_ci if not needed
23432             #--------------------------------------------------------
23433
23434             # Undo the ci of the leading token if its controlling token
23435             # went out on a previous line without ci
23436             if ( $ci_levels_to_go[$ibeg] ) {
23437                 my $Kbeg  = $K_to_go[$ibeg];
23438                 my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
23439                 if ( $seqno && $undo_extended_ci{$seqno} ) {
23440
23441                     # but do not undo ci set by the -lp flag
23442                     if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
23443                         $ci_levels_to_go[$ibeg] = 0;
23444                         $leading_spaces_to_go[$ibeg] =
23445                           $reduced_spaces_to_go[$ibeg];
23446                     }
23447                 }
23448             }
23449
23450             # Flag any controlling opening tokens in lines without ci.  This
23451             # will be used later in the above if statement to undo the ci which
23452             # they added.  The array i_controlling_ci[$line] was prepared at
23453             # the top of this routine.
23454             if ( !$ci_levels_to_go[$ibeg]
23455                 && defined( $i_controlling_ci[$line] ) )
23456             {
23457                 foreach my $i ( @{ $i_controlling_ci[$line] } ) {
23458                     my $seqno = $type_sequence_to_go[$i];
23459                     $undo_extended_ci{$seqno} = 1;
23460                 }
23461             }
23462
23463             $lev_last = $lev;
23464         }
23465
23466         return;
23467     } ## end sub undo_ci
23468 }
23469
23470 {    ## begin closure set_logical_padding
23471     my %is_math_op;
23472
23473     BEGIN {
23474
23475         my @q = qw( + - * / );
23476         @is_math_op{@q} = (1) x scalar(@q);
23477     }
23478
23479     sub set_logical_padding {
23480
23481         # Look at a batch of lines and see if extra padding can improve the
23482         # alignment when there are certain leading operators. Here is an
23483         # example, in which some extra space is introduced before
23484         # '( $year' to make it line up with the subsequent lines:
23485         #
23486         #       if (   ( $Year < 1601 )
23487         #           || ( $Year > 2899 )
23488         #           || ( $EndYear < 1601 )
23489         #           || ( $EndYear > 2899 ) )
23490         #       {
23491         #           &Error_OutOfRange;
23492         #       }
23493         #
23494         my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
23495           = @_;
23496         my $max_line = @{$ri_first} - 1;
23497
23498         my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
23499             $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
23500
23501         # Patch to produce padding in the first line of short code blocks.
23502         # This is part of an update to fix cases b562 .. b983.
23503         # This is needed to compensate for a change which was made in 'sub
23504         # starting_one_line_block' to prevent blinkers.  Previously, that sub
23505         # would not look at the total block size and rely on sub
23506         # break_long_lines to break up long blocks. Consequently, the
23507         # first line of those batches would end in the opening block brace of a
23508         # sort/map/grep/eval block.  When this was changed to immediately check
23509         # for blocks which were too long, the opening block brace would go out
23510         # in a single batch, and the block contents would go out as the next
23511         # batch.  This caused the logic in this routine which decides if the
23512         # first line should be padded to be incorrect.  To fix this, we set a
23513         # flag if the previous batch ended in an opening sort/map/grep/eval
23514         # block brace, and use it to adjust the logic to compensate.
23515
23516         # For example, the following would have previously been a single batch
23517         # but now is two batches.  We want to pad the line starting in '$dir':
23518         #    my (@indices) =                      # batch n-1  (prev batch n)
23519         #      sort {                             # batch n-1  (prev batch n)
23520         #            $dir eq 'left'               # batch n
23521         #          ? $cells[$a] <=> $cells[$b]    # batch n
23522         #          : $cells[$b] <=> $cells[$a];   # batch n
23523         #      } ( 0 .. $#cells );                # batch n
23524
23525         my $rLL                  = $self->[_rLL_];
23526         my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
23527
23528         my $is_short_block;
23529         if ( $K_to_go[0] > 0 ) {
23530             my $Kp = $K_to_go[0] - 1;
23531             if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23532                 $Kp -= 1;
23533             }
23534             if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
23535                 $Kp -= 1;
23536                 if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
23537                     $Kp -= 1;
23538                 }
23539             }
23540             my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
23541             if ($seqno) {
23542                 my $block_type = $rblock_type_of_seqno->{$seqno};
23543                 if ($block_type) {
23544                     $is_short_block = $is_sort_map_grep_eval{$block_type};
23545                     $is_short_block ||= $want_one_line_block{$block_type};
23546                 }
23547             }
23548         }
23549
23550         # looking at each line of this batch..
23551         foreach my $line ( 0 .. $max_line - 1 ) {
23552
23553             # see if the next line begins with a logical operator
23554             $ibeg      = $ri_first->[$line];
23555             $iend      = $ri_last->[$line];
23556             $ibeg_next = $ri_first->[ $line + 1 ];
23557             $tok_next  = $tokens_to_go[$ibeg_next];
23558             $type_next = $types_to_go[$ibeg_next];
23559
23560             $has_leading_op_next = ( $tok_next =~ /^\w/ )
23561               ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
23562               : $is_chain_operator{$type_next};    # and, or
23563
23564             next unless ($has_leading_op_next);
23565
23566             # next line must not be at lesser depth
23567             next
23568               if ( $nesting_depth_to_go[$ibeg] >
23569                 $nesting_depth_to_go[$ibeg_next] );
23570
23571             # identify the token in this line to be padded on the left
23572             $ipad = undef;
23573
23574             # handle lines at same depth...
23575             if ( $nesting_depth_to_go[$ibeg] ==
23576                 $nesting_depth_to_go[$ibeg_next] )
23577             {
23578
23579                 # if this is not first line of the batch ...
23580                 if ( $line > 0 ) {
23581
23582                     # and we have leading operator..
23583                     next if $has_leading_op;
23584
23585                     # Introduce padding if..
23586                     # 1. the previous line is at lesser depth, or
23587                     # 2. the previous line ends in an assignment
23588                     # 3. the previous line ends in a 'return'
23589                     # 4. the previous line ends in a comma
23590                     # Example 1: previous line at lesser depth
23591                     #       if (   ( $Year < 1601 )      # <- we are here but
23592                     #           || ( $Year > 2899 )      #  list has not yet
23593                     #           || ( $EndYear < 1601 )   # collapsed vertically
23594                     #           || ( $EndYear > 2899 ) )
23595                     #       {
23596                     #
23597                     # Example 2: previous line ending in assignment:
23598                     #    $leapyear =
23599                     #        $year % 4   ? 0     # <- We are here
23600                     #      : $year % 100 ? 1
23601                     #      : $year % 400 ? 0
23602                     #      : 1;
23603                     #
23604                     # Example 3: previous line ending in comma:
23605                     #    push @expr,
23606                     #        /test/   ? undef
23607                     #      : eval($_) ? 1
23608                     #      : eval($_) ? 1
23609                     #      :            0;
23610
23611                     # be sure levels agree (never indent after an indented 'if')
23612                     next
23613                       if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
23614
23615                     # allow padding on first line after a comma but only if:
23616                     # (1) this is line 2 and
23617                     # (2) there are at more than three lines and
23618                     # (3) lines 3 and 4 have the same leading operator
23619                     # These rules try to prevent padding within a long
23620                     # comma-separated list.
23621                     my $ok_comma;
23622                     if (   $types_to_go[$iendm] eq ','
23623                         && $line == 1
23624                         && $max_line > 2 )
23625                     {
23626                         my $ibeg_next_next = $ri_first->[ $line + 2 ];
23627                         my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
23628                         $ok_comma = $tok_next_next eq $tok_next;
23629                     }
23630
23631                     next
23632                       unless (
23633                            $is_assignment{ $types_to_go[$iendm] }
23634                         || $ok_comma
23635                         || ( $nesting_depth_to_go[$ibegm] <
23636                             $nesting_depth_to_go[$ibeg] )
23637                         || (   $types_to_go[$iendm] eq 'k'
23638                             && $tokens_to_go[$iendm] eq 'return' )
23639                       );
23640
23641                     # we will add padding before the first token
23642                     $ipad = $ibeg;
23643                 }
23644
23645                 # for first line of the batch..
23646                 else {
23647
23648                     # WARNING: Never indent if first line is starting in a
23649                     # continued quote, which would change the quote.
23650                     next if $starting_in_quote;
23651
23652                     # if this is text after closing '}'
23653                     # then look for an interior token to pad
23654                     if ( $types_to_go[$ibeg] eq '}' ) {
23655
23656                     }
23657
23658                     # otherwise, we might pad if it looks really good
23659                     elsif ($is_short_block) {
23660                         $ipad = $ibeg;
23661                     }
23662                     else {
23663
23664                         # we might pad token $ibeg, so be sure that it
23665                         # is at the same depth as the next line.
23666                         next
23667                           if ( $nesting_depth_to_go[$ibeg] !=
23668                             $nesting_depth_to_go[$ibeg_next] );
23669
23670                         # We can pad on line 1 of a statement if at least 3
23671                         # lines will be aligned. Otherwise, it
23672                         # can look very confusing.
23673
23674                  # We have to be careful not to pad if there are too few
23675                  # lines.  The current rule is:
23676                  # (1) in general we require at least 3 consecutive lines
23677                  # with the same leading chain operator token,
23678                  # (2) but an exception is that we only require two lines
23679                  # with leading colons if there are no more lines.  For example,
23680                  # the first $i in the following snippet would get padding
23681                  # by the second rule:
23682                  #
23683                  #   $i == 1 ? ( "First", "Color" )
23684                  # : $i == 2 ? ( "Then",  "Rarity" )
23685                  # :           ( "Then",  "Name" );
23686
23687                         if ( $max_line > 1 ) {
23688                             my $leading_token = $tokens_to_go[$ibeg_next];
23689                             my $tokens_differ;
23690
23691                             # never indent line 1 of a '.' series because
23692                             # previous line is most likely at same level.
23693                             # TODO: we should also look at the leading_spaces
23694                             # of the last output line and skip if it is same
23695                             # as this line.
23696                             next if ( $leading_token eq '.' );
23697
23698                             my $count = 1;
23699                             foreach my $l ( 2 .. 3 ) {
23700                                 last if ( $line + $l > $max_line );
23701                                 my $ibeg_next_next = $ri_first->[ $line + $l ];
23702                                 if ( $tokens_to_go[$ibeg_next_next] ne
23703                                     $leading_token )
23704                                 {
23705                                     $tokens_differ = 1;
23706                                     last;
23707                                 }
23708                                 $count++;
23709                             }
23710                             next if ($tokens_differ);
23711                             next if ( $count < 3 && $leading_token ne ':' );
23712                             $ipad = $ibeg;
23713                         }
23714                         else {
23715                             next;
23716                         }
23717                     }
23718                 }
23719             }
23720
23721             # find interior token to pad if necessary
23722             if ( !defined($ipad) ) {
23723
23724                 foreach my $i ( $ibeg .. $iend - 1 ) {
23725
23726                     # find any unclosed container
23727                     next
23728                       unless ( $type_sequence_to_go[$i]
23729                         && $mate_index_to_go[$i] > $iend );
23730
23731                     # find next nonblank token to pad
23732                     $ipad = $inext_to_go[$i];
23733                     last if $ipad;
23734                 }
23735                 last if ( !$ipad || $ipad > $iend );
23736             }
23737
23738             # We cannot pad the first leading token of a file because
23739             # it could cause a bug in which the starting indentation
23740             # level is guessed incorrectly each time the code is run
23741             # though perltidy, thus causing the code to march off to
23742             # the right.  For example, the following snippet would have
23743             # this problem:
23744
23745 ##     ov_method mycan( $package, '(""' ),       $package
23746 ##  or ov_method mycan( $package, '(0+' ),       $package
23747 ##  or ov_method mycan( $package, '(bool' ),     $package
23748 ##  or ov_method mycan( $package, '(nomethod' ), $package;
23749
23750             # If this snippet is within a block this won't happen
23751             # unless the user just processes the snippet alone within
23752             # an editor.  In that case either the user will see and
23753             # fix the problem or it will be corrected next time the
23754             # entire file is processed with perltidy.
23755             next if ( $ipad == 0 && $peak_batch_size <= 1 );
23756
23757 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
23758 ## IT DID MORE HARM THAN GOOD
23759 ##            ceil(
23760 ##                      $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
23761 ##                    / $upem
23762 ##            ),
23763 ##            # do not put leading padding for just 2 lines of math
23764 ##            if (   $ipad == $ibeg
23765 ##                && $line > 0
23766 ##                && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
23767 ##                && $is_math_op{$type_next}
23768 ##                && $line + 2 <= $max_line )
23769 ##            {
23770 ##                my $ibeg_next_next = $ri_first->[ $line + 2 ];
23771 ##                my $type_next_next = $types_to_go[$ibeg_next_next];
23772 ##                next if !$is_math_op{$type_next_next};
23773 ##            }
23774
23775             # next line must not be at greater depth
23776             my $iend_next = $ri_last->[ $line + 1 ];
23777             next
23778               if ( $nesting_depth_to_go[ $iend_next + 1 ] >
23779                 $nesting_depth_to_go[$ipad] );
23780
23781             # lines must be somewhat similar to be padded..
23782             my $inext_next = $inext_to_go[$ibeg_next];
23783             my $type       = $types_to_go[$ipad];
23784
23785             # see if there are multiple continuation lines
23786             my $logical_continuation_lines = 1;
23787             if ( $line + 2 <= $max_line ) {
23788                 my $leading_token  = $tokens_to_go[$ibeg_next];
23789                 my $ibeg_next_next = $ri_first->[ $line + 2 ];
23790                 if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
23791                     && $nesting_depth_to_go[$ibeg_next] eq
23792                     $nesting_depth_to_go[$ibeg_next_next] )
23793                 {
23794                     $logical_continuation_lines++;
23795                 }
23796             }
23797
23798             # see if leading types match
23799             my $types_match = $types_to_go[$inext_next] eq $type;
23800             my $matches_without_bang;
23801
23802             # if first line has leading ! then compare the following token
23803             if ( !$types_match && $type eq '!' ) {
23804                 $types_match = $matches_without_bang =
23805                   $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
23806             }
23807             if (
23808
23809                 # either we have multiple continuation lines to follow
23810                 # and we are not padding the first token
23811                 (
23812                     $logical_continuation_lines > 1
23813                     && ( $ipad > 0 || $is_short_block )
23814                 )
23815
23816                 # or..
23817                 || (
23818
23819                     # types must match
23820                     $types_match
23821
23822                     # and keywords must match if keyword
23823                     && !(
23824                            $type eq 'k'
23825                         && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
23826                     )
23827                 )
23828               )
23829             {
23830
23831                 #----------------------begin special checks--------------
23832                 #
23833                 # SPECIAL CHECK 1:
23834                 # A check is needed before we can make the pad.
23835                 # If we are in a list with some long items, we want each
23836                 # item to stand out.  So in the following example, the
23837                 # first line beginning with '$casefold->' would look good
23838                 # padded to align with the next line, but then it
23839                 # would be indented more than the last line, so we
23840                 # won't do it.
23841                 #
23842                 #  ok(
23843                 #      $casefold->{code}         eq '0041'
23844                 #        && $casefold->{status}  eq 'C'
23845                 #        && $casefold->{mapping} eq '0061',
23846                 #      'casefold 0x41'
23847                 #  );
23848                 #
23849                 # Note:
23850                 # It would be faster, and almost as good, to use a comma
23851                 # count, and not pad if comma_count > 1 and the previous
23852                 # line did not end with a comma.
23853                 #
23854                 my $ok_to_pad = 1;
23855
23856                 my $ibg   = $ri_first->[ $line + 1 ];
23857                 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
23858
23859                 # just use simplified formula for leading spaces to avoid
23860                 # needless sub calls
23861                 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
23862
23863                 # look at each line beyond the next ..
23864                 my $l = $line + 1;
23865                 foreach my $ltest ( $line + 2 .. $max_line ) {
23866                     $l = $ltest;
23867                     my $ibeg_t = $ri_first->[$l];
23868
23869                     # quit looking at the end of this container
23870                     last
23871                       if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
23872                       || ( $nesting_depth_to_go[$ibeg_t] < $depth );
23873
23874                     # cannot do the pad if a later line would be
23875                     # outdented more
23876                     if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
23877                         $lsp )
23878                     {
23879                         $ok_to_pad = 0;
23880                         last;
23881                     }
23882                 }
23883
23884                 # don't pad if we end in a broken list
23885                 if ( $l == $max_line ) {
23886                     my $i2 = $ri_last->[$l];
23887                     if ( $types_to_go[$i2] eq '#' ) {
23888                         my $i1 = $ri_first->[$l];
23889                         next if terminal_type_i( $i1, $i2 ) eq ',';
23890                     }
23891                 }
23892
23893                 # SPECIAL CHECK 2:
23894                 # a minus may introduce a quoted variable, and we will
23895                 # add the pad only if this line begins with a bare word,
23896                 # such as for the word 'Button' here:
23897                 #    [
23898                 #         Button      => "Print letter \"~$_\"",
23899                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
23900                 #        -accelerator => "Meta+$_"
23901                 #    ];
23902                 #
23903                 #  On the other hand, if 'Button' is quoted, it looks best
23904                 #  not to pad:
23905                 #    [
23906                 #        'Button'     => "Print letter \"~$_\"",
23907                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
23908                 #        -accelerator => "Meta+$_"
23909                 #    ];
23910                 if ( $types_to_go[$ibeg_next] eq 'm' ) {
23911                     $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
23912                 }
23913
23914                 next unless $ok_to_pad;
23915
23916                 #----------------------end special check---------------
23917
23918                 my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
23919                 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
23920                 $pad_spaces = $length_2 - $length_1;
23921
23922                 # If the first line has a leading ! and the second does
23923                 # not, then remove one space to try to align the next
23924                 # leading characters, which are often the same.  For example:
23925                 #  if (  !$ts
23926                 #      || $ts == $self->Holder
23927                 #      || $self->Holder->Type eq "Arena" )
23928                 #
23929                 # This usually helps readability, but if there are subsequent
23930                 # ! operators things will still get messed up.  For example:
23931                 #
23932                 #  if (  !exists $Net::DNS::typesbyname{$qtype}
23933                 #      && exists $Net::DNS::classesbyname{$qtype}
23934                 #      && !exists $Net::DNS::classesbyname{$qclass}
23935                 #      && exists $Net::DNS::typesbyname{$qclass} )
23936                 # We can't fix that.
23937                 if ($matches_without_bang) { $pad_spaces-- }
23938
23939                 # make sure this won't change if -lp is used
23940                 my $indentation_1 = $leading_spaces_to_go[$ibeg];
23941                 if ( ref($indentation_1)
23942                     && $indentation_1->get_recoverable_spaces() == 0 )
23943                 {
23944                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
23945                     if ( ref($indentation_2)
23946                         && $indentation_2->get_recoverable_spaces() != 0 )
23947                     {
23948                         $pad_spaces = 0;
23949                     }
23950                 }
23951
23952                 # we might be able to handle a pad of -1 by removing a blank
23953                 # token
23954                 if ( $pad_spaces < 0 ) {
23955
23956                     # Deactivated for -kpit due to conflict. This block deletes
23957                     # a space in an attempt to improve alignment in some cases,
23958                     # but it may conflict with user spacing requests.  For now
23959                     # it is just deactivated if the -kpit option is used.
23960                     if ( $pad_spaces == -1 ) {
23961                         if (   $ipad > $ibeg
23962                             && $types_to_go[ $ipad - 1 ] eq 'b'
23963                             && !%keyword_paren_inner_tightness )
23964                         {
23965                             $self->pad_token( $ipad - 1, $pad_spaces );
23966                         }
23967                     }
23968                     $pad_spaces = 0;
23969                 }
23970
23971                 # now apply any padding for alignment
23972                 if ( $ipad >= 0 && $pad_spaces ) {
23973
23974                     my $length_t = total_line_length( $ibeg, $iend );
23975                     if ( $pad_spaces + $length_t <=
23976                         $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
23977                     {
23978                         $self->pad_token( $ipad, $pad_spaces );
23979                     }
23980                 }
23981             }
23982         }
23983         continue {
23984             $iendm          = $iend;
23985             $ibegm          = $ibeg;
23986             $has_leading_op = $has_leading_op_next;
23987         } ## end of loop over lines
23988         return;
23989     } ## end sub set_logical_padding
23990 } ## end closure set_logical_padding
23991
23992 sub pad_token {
23993
23994     # insert $pad_spaces before token number $ipad
23995     my ( $self, $ipad, $pad_spaces ) = @_;
23996     my $rLL     = $self->[_rLL_];
23997     my $KK      = $K_to_go[$ipad];
23998     my $tok     = $rLL->[$KK]->[_TOKEN_];
23999     my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
24000
24001     if ( $pad_spaces > 0 ) {
24002         $tok = SPACE x $pad_spaces . $tok;
24003         $tok_len += $pad_spaces;
24004     }
24005     elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
24006         $tok     = EMPTY_STRING;
24007         $tok_len = 0;
24008     }
24009     else {
24010
24011         # shouldn't happen
24012         return;
24013     }
24014
24015     $tok     = $rLL->[$KK]->[_TOKEN_]        = $tok;
24016     $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
24017
24018     $token_lengths_to_go[$ipad] += $pad_spaces;
24019     $tokens_to_go[$ipad] = $tok;
24020
24021     foreach my $i ( $ipad .. $max_index_to_go ) {
24022         $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
24023     }
24024     return;
24025 } ## end sub pad_token
24026
24027 {    ## begin closure make_alignment_patterns
24028
24029     my %keyword_map;
24030     my %operator_map;
24031     my %is_w_n_C;
24032     my %is_my_local_our;
24033     my %is_kwU;
24034     my %is_use_like;
24035     my %is_binary_type;
24036     my %is_binary_keyword;
24037     my %name_map;
24038
24039     BEGIN {
24040
24041         # Note: %block_type_map is now global to enable the -gal=s option
24042
24043         # map certain keywords to the same 'if' class to align
24044         # long if/elsif sequences. [elsif.pl]
24045         %keyword_map = (
24046             'unless'  => 'if',
24047             'else'    => 'if',
24048             'elsif'   => 'if',
24049             'when'    => 'given',
24050             'default' => 'given',
24051             'case'    => 'switch',
24052
24053             # treat an 'undef' similar to numbers and quotes
24054             'undef' => 'Q',
24055         );
24056
24057         # map certain operators to the same class for pattern matching
24058         %operator_map = (
24059             '!~' => '=~',
24060             '+=' => '+=',
24061             '-=' => '+=',
24062             '*=' => '+=',
24063             '/=' => '+=',
24064         );
24065
24066         %is_w_n_C = (
24067             'w' => 1,
24068             'n' => 1,
24069             'C' => 1,
24070         );
24071
24072         # leading keywords which to skip for efficiency when making parenless
24073         # container names
24074         my @q = qw( my local our return );
24075         @{is_my_local_our}{@q} = (1) x scalar(@q);
24076
24077         # leading keywords where we should just join one token to form
24078         # parenless name
24079         @q = qw( use );
24080         @{is_use_like}{@q} = (1) x scalar(@q);
24081
24082         # leading token types which may be used to make a container name
24083         @q = qw( k w U );
24084         @{is_kwU}{@q} = (1) x scalar(@q);
24085
24086         # token types which prevent using leading word as a container name
24087         @q = qw(
24088           x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
24089           &= // >> ~. &. |. ^.
24090           **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
24091         );
24092         push @q, ',';
24093         @{is_binary_type}{@q} = (1) x scalar(@q);
24094
24095         # token keywords which prevent using leading word as a container name
24096         @_ = qw(and or err eq ne cmp);
24097         @is_binary_keyword{@_} = (1) x scalar(@_);
24098
24099         # Some common function calls whose args can be aligned.  These do not
24100         # give good alignments if the lengths differ significantly.
24101         %name_map = (
24102             'unlike' => 'like',
24103             'isnt'   => 'is',
24104             ##'is_deeply' => 'is',   # poor; names lengths too different
24105         );
24106
24107     }
24108
24109     sub make_alignment_patterns {
24110
24111         # Here we do some important preliminary work for the
24112         # vertical aligner.  We create four arrays for one
24113         # output line. These arrays contain strings that can
24114         # be tested by the vertical aligner to see if
24115         # consecutive lines can be aligned vertically.
24116         #
24117         # The four arrays are indexed on the vertical
24118         # alignment fields and are:
24119         # @tokens - a list of any vertical alignment tokens for this line.
24120         #   These are tokens, such as '=' '&&' '#' etc which
24121         #   we want to might align vertically.  These are
24122         #   decorated with various information such as
24123         #   nesting depth to prevent unwanted vertical
24124         #   alignment matches.
24125         # @fields - the actual text of the line between the vertical alignment
24126         #   tokens.
24127         # @patterns - a modified list of token types, one for each alignment
24128         #   field.  These should normally each match before alignment is
24129         #   allowed, even when the alignment tokens match.
24130         # @field_lengths - the display width of each field
24131
24132         my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
24133             $ralignment_hash )
24134           = @_;
24135
24136         # The var $ralignment_hash contains all of the alignments for this
24137         # line.  It is not yet used but is available for future coding in case
24138         # there is a need to do a preliminary scan of the alignment tokens.
24139         if (DEVEL_MODE) {
24140             my $new_count = 0;
24141             if ( defined($ralignment_hash) ) {
24142                 $new_count = keys %{$ralignment_hash};
24143             }
24144             my $old_count = $alignment_count;
24145             $old_count = 0 unless ($old_count);
24146             if ( $new_count != $old_count ) {
24147                 my $K   = $K_to_go[$ibeg];
24148                 my $rLL = $self->[_rLL_];
24149                 my $lnl = $rLL->[$K]->[_LINE_INDEX_];
24150                 Fault(
24151 "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
24152                 );
24153             }
24154         }
24155
24156         # -------------------------------------
24157         # Shortcut for lines without alignments
24158         # -------------------------------------
24159         if ( !$alignment_count ) {
24160             my $rtokens        = [];
24161             my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
24162                   $summed_lengths_to_go[$ibeg] ];
24163             my $rpatterns;
24164             my $rfields;
24165             if ( $ibeg == $iend ) {
24166                 $rfields   = [ $tokens_to_go[$ibeg] ];
24167                 $rpatterns = [ $types_to_go[$ibeg] ];
24168             }
24169             else {
24170                 $rfields =
24171                   [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
24172                 $rpatterns =
24173                   [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
24174             }
24175             return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
24176         }
24177
24178         my $i_start        = $ibeg;
24179         my $depth          = 0;
24180         my %container_name = ( 0 => EMPTY_STRING );
24181
24182         my @tokens        = ();
24183         my @fields        = ();
24184         my @patterns      = ();
24185         my @field_lengths = ();
24186
24187         #-------------------------------------------------------------
24188         # Make a container name for any uncontained commas, issue c089
24189         #-------------------------------------------------------------
24190         # This is a generalization of the fix for rt136416 which was a
24191         # specialized patch just for 'use Module' statements.
24192         # We restrict this to semicolon-terminated statements; that way
24193         # we know that the top level commas are not in a list container.
24194         if ( $ibeg == 0 && $iend == $max_index_to_go ) {
24195             my $iterm = $max_index_to_go;
24196             if ( $types_to_go[$iterm] eq '#' ) {
24197                 $iterm = $iprev_to_go[$iterm];
24198             }
24199
24200             # Alignment lines ending like '=> sub {';  fixes issue c093
24201             my $term_type_ok = $types_to_go[$iterm] eq ';';
24202             $term_type_ok ||=
24203               $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
24204
24205             if (   $iterm > $ibeg
24206                 && $term_type_ok
24207                 && !$is_my_local_our{ $tokens_to_go[$ibeg] }
24208                 && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
24209             {
24210
24211                 # Make a container name by combining all leading barewords,
24212                 # keywords and functions.
24213                 my $name  = EMPTY_STRING;
24214                 my $count = 0;
24215                 my $count_max;
24216                 my $iname_end;
24217                 my $ilast_blank;
24218                 for ( $ibeg .. $iterm ) {
24219                     my $type = $types_to_go[$_];
24220
24221                     if ( $type eq 'b' ) {
24222                         $ilast_blank = $_;
24223                         next;
24224                     }
24225
24226                     my $token = $tokens_to_go[$_];
24227
24228                     # Give up if we find an opening paren, binary operator or
24229                     # comma within or after the proposed container name.
24230                     if (   $token eq '('
24231                         || $is_binary_type{$type}
24232                         || $type eq 'k' && $is_binary_keyword{$token} )
24233                     {
24234                         $name = EMPTY_STRING;
24235                         last;
24236                     }
24237
24238                     # The container name is only built of certain types:
24239                     last if ( !$is_kwU{$type} );
24240
24241                     # Normally it is made of one word, but two words for 'use'
24242                     if ( $count == 0 ) {
24243                         if (   $type eq 'k'
24244                             && $is_use_like{ $tokens_to_go[$_] } )
24245                         {
24246                             $count_max = 2;
24247                         }
24248                         else {
24249                             $count_max = 1;
24250                         }
24251                     }
24252                     elsif ( defined($count_max) && $count >= $count_max ) {
24253                         last;
24254                     }
24255
24256                     if ( defined( $name_map{$token} ) ) {
24257                         $token = $name_map{$token};
24258                     }
24259
24260                     $name .= SPACE . $token;
24261                     $iname_end = $_;
24262                     $count++;
24263                 }
24264
24265                 # Require a space after the container name token(s)
24266                 if (   $name
24267                     && defined($ilast_blank)
24268                     && $ilast_blank > $iname_end )
24269                 {
24270                     $name = substr( $name, 1 );
24271                     $container_name{'0'} = $name;
24272                 }
24273             }
24274         }
24275
24276         # --------------------
24277         # Loop over all tokens
24278         # --------------------
24279         my $j = 0;    # field index
24280
24281         $patterns[0] = EMPTY_STRING;
24282         my %token_count;
24283         for my $i ( $ibeg .. $iend ) {
24284
24285             # Keep track of containers balanced on this line only.
24286             # These are used below to prevent unwanted cross-line alignments.
24287             # Unbalanced containers already avoid aligning across
24288             # container boundaries.
24289
24290             my $type       = $types_to_go[$i];
24291             my $token      = $tokens_to_go[$i];
24292             my $depth_last = $depth;
24293             if ( $type_sequence_to_go[$i] ) {
24294                 if ( $is_opening_token{$token} ) {
24295
24296                     # if container is balanced on this line...
24297                     my $i_mate = $mate_index_to_go[$i];
24298                     if ( $i_mate > $i && $i_mate <= $iend ) {
24299                         $depth++;
24300
24301                      # Append the previous token name to make the container name
24302                      # more unique.  This name will also be given to any commas
24303                      # within this container, and it helps avoid undesirable
24304                      # alignments of different types of containers.
24305
24306                      # Containers beginning with { and [ are given those names
24307                      # for uniqueness. That way commas in different containers
24308                      # will not match. Here is an example of what this prevents:
24309                      #   a => [ 1,       2, 3 ],
24310                      #   b => { b1 => 4, b2 => 5 },
24311                      # Here is another example of what we avoid by labeling the
24312                      # commas properly:
24313
24314                    # is_d( [ $a,        $a ], [ $b,               $c ] );
24315                    # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
24316                    # is_d( [ \$a,       \$a ], [ \$b,             \$c ] );
24317
24318                         my $name = $token;
24319                         if ( $token eq '(' ) {
24320                             $name = $self->make_paren_name($i);
24321                         }
24322
24323                         # name cannot be '.', so change to something else if so
24324                         if ( $name eq '.' ) { $name = 'dot' }
24325
24326                         $container_name{$depth} = "+" . $name;
24327
24328                         # Make the container name even more unique if necessary.
24329                         # If we are not vertically aligning this opening paren,
24330                         # append a character count to avoid bad alignment since
24331                         # it usually looks bad to align commas within containers
24332                         # for which the opening parens do not align.  Here
24333                         # is an example very BAD alignment of commas (because
24334                         # the atan2 functions are not all aligned):
24335                         #    $XY =
24336                         #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
24337                         #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
24338                         #      $X * atan2( $X,            1 ) -
24339                         #      $Y * atan2( $Y,            1 );
24340                         #
24341                         # On the other hand, it is usually okay to align commas
24342                         # if opening parens align, such as:
24343                         #    glVertex3d( $cx + $s * $xs, $cy,            $z );
24344                         #    glVertex3d( $cx,            $cy + $s * $ys, $z );
24345                         #    glVertex3d( $cx - $s * $xs, $cy,            $z );
24346                         #    glVertex3d( $cx,            $cy - $s * $ys, $z );
24347                         #
24348                         # To distinguish between these situations, we append
24349                         # the length of the line from the previous matching
24350                         # token, or beginning of line, to the function name.
24351                         # This will allow the vertical aligner to reject
24352                         # undesirable matches.
24353
24354                         # if we are not aligning on this paren...
24355                         if ( !$ralignment_type_to_go->[$i] ) {
24356
24357                             # Sum length from previous alignment
24358                             my $len = token_sequence_length( $i_start, $i - 1 );
24359
24360                             # Minor patch: do not include the length of any '!'.
24361                             # Otherwise, commas in the following line will not
24362                             # match
24363                             #  ok( 20, tapprox( ( pdl 2,  3 ), ( pdl 2, 3 ) ) );
24364                             #  ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
24365                             if ( grep { $_ eq '!' }
24366                                 @types_to_go[ $i_start .. $i - 1 ] )
24367                             {
24368                                 $len -= 1;
24369                             }
24370
24371                             if ( $i_start == $ibeg ) {
24372
24373                                 # For first token, use distance from start of
24374                                 # line but subtract off the indentation due to
24375                                 # level.  Otherwise, results could vary with
24376                                 # indentation.
24377                                 $len +=
24378                                   leading_spaces_to_go($ibeg) -
24379                                   $levels_to_go[$i_start] *
24380                                   $rOpts_indent_columns;
24381                                 if ( $len < 0 ) { $len = 0 }
24382                             }
24383
24384                             # tack this length onto the container name to try
24385                             # to make a unique token name
24386                             $container_name{$depth} .= "-" . $len;
24387                         } ## end if ( !$ralignment_type_to_go...)
24388                     } ## end if ( $i_mate > $i && $i_mate...)
24389                 } ## end if ( $is_opening_token...)
24390
24391                 elsif ( $is_closing_type{$token} ) {
24392                     $depth-- if $depth > 0;
24393                 }
24394             } ## end if ( $type_sequence_to_go...)
24395
24396             # if we find a new synchronization token, we are done with
24397             # a field
24398             if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
24399
24400                 my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
24401
24402                 # map similar items
24403                 my $tok_map = $operator_map{$tok};
24404                 $tok = $tok_map if ($tok_map);
24405
24406                 # make separators in different nesting depths unique
24407                 # by appending the nesting depth digit.
24408                 if ( $raw_tok ne '#' ) {
24409                     $tok .= "$nesting_depth_to_go[$i]";
24410                 }
24411
24412                 # also decorate commas with any container name to avoid
24413                 # unwanted cross-line alignments.
24414                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
24415
24416                   # If we are at an opening token which increased depth, we have
24417                   # to use the name from the previous depth.
24418                     my $depth_p =
24419                       ( $depth_last < $depth ? $depth_last : $depth );
24420                     if ( $container_name{$depth_p} ) {
24421                         $tok .= $container_name{$depth_p};
24422                     }
24423                 }
24424
24425                 # Patch to avoid aligning leading and trailing if, unless.
24426                 # Mark trailing if, unless statements with container names.
24427                 # This makes them different from leading if, unless which
24428                 # are not so marked at present.  If we ever need to name
24429                 # them too, we could use ci to distinguish them.
24430                 # Example problem to avoid:
24431                 #    return ( 2, "DBERROR" )
24432                 #      if ( $retval == 2 );
24433                 #    if   ( scalar @_ ) {
24434                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
24435                 #    }
24436                 if ( $raw_tok eq '(' ) {
24437                     if (   $ci_levels_to_go[$ibeg]
24438                         && $container_name{$depth} =~ /^\+(if|unless)/ )
24439                     {
24440                         $tok .= $container_name{$depth};
24441                     }
24442                 }
24443
24444                 # Decorate block braces with block types to avoid
24445                 # unwanted alignments such as the following:
24446                 # foreach ( @{$routput_array} ) { $fh->print($_) }
24447                 # eval                          { $fh->close() };
24448                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
24449                     my $block_type = $block_type_to_go[$i];
24450
24451                     # map certain related block types to allow
24452                     # else blocks to align
24453                     $block_type = $block_type_map{$block_type}
24454                       if ( defined( $block_type_map{$block_type} ) );
24455
24456                     # remove sub names to allow one-line sub braces to align
24457                     # regardless of name
24458                     if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
24459
24460                     # allow all control-type blocks to align
24461                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
24462
24463                     $tok .= $block_type;
24464                 }
24465
24466                 # Mark multiple copies of certain tokens with the copy number
24467                 # This will allow the aligner to decide if they are matched.
24468                 # For now, only do this for equals. For example, the two
24469                 # equals on the next line will be labeled '=0' and '=0.2'.
24470                 # Later, the '=0.2' will be ignored in alignment because it
24471                 # has no match.
24472
24473                 # $|          = $debug = 1 if $opt_d;
24474                 # $full_index = 1          if $opt_i;
24475
24476                 if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
24477                     $token_count{$tok}++;
24478                     if ( $token_count{$tok} > 1 ) {
24479                         $tok .= '.' . $token_count{$tok};
24480                     }
24481                 }
24482
24483                 # concatenate the text of the consecutive tokens to form
24484                 # the field
24485                 push( @fields,
24486                     join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
24487
24488                 push @field_lengths,
24489                   $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
24490
24491                 # store the alignment token for this field
24492                 push( @tokens, $tok );
24493
24494                 # get ready for the next batch
24495                 $i_start = $i;
24496                 $j++;
24497                 $patterns[$j] = EMPTY_STRING;
24498             } ## end if ( new synchronization token
24499
24500             # continue accumulating tokens
24501
24502             # for keywords we have to use the actual text
24503             if ( $type eq 'k' ) {
24504
24505                 my $tok_fix = $tokens_to_go[$i];
24506
24507                 # but map certain keywords to a common string to allow
24508                 # alignment.
24509                 $tok_fix = $keyword_map{$tok_fix}
24510                   if ( defined( $keyword_map{$tok_fix} ) );
24511                 $patterns[$j] .= $tok_fix;
24512             }
24513
24514             elsif ( $type eq 'b' ) {
24515                 $patterns[$j] .= $type;
24516             }
24517
24518             # Mark most things before arrows as a quote to
24519             # get them to line up. Testfile: mixed.pl.
24520
24521             # handle $type =~ /^[wnC]$/
24522             elsif ( $is_w_n_C{$type} ) {
24523
24524                 my $type_fix = $type;
24525
24526                 if ( $i < $iend - 1 ) {
24527                     my $next_type = $types_to_go[ $i + 1 ];
24528                     my $i_next_nonblank =
24529                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
24530
24531                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
24532                         $type_fix = 'Q';
24533
24534                         # Patch to ignore leading minus before words,
24535                         # by changing pattern 'mQ' into just 'Q',
24536                         # so that we can align things like this:
24537                         #  Button   => "Print letter \"~$_\"",
24538                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
24539                         if ( $patterns[$j] eq 'm' ) {
24540                             $patterns[$j] = EMPTY_STRING;
24541                         }
24542                     }
24543                 }
24544
24545                 # Convert a bareword within braces into a quote for
24546                 # matching.  This will allow alignment of expressions like
24547                 # this:
24548                 #    local ( $SIG{'INT'} ) = IGNORE;
24549                 #    local ( $SIG{ALRM} )  = 'POSTMAN';
24550                 if (   $type eq 'w'
24551                     && $i > $ibeg
24552                     && $i < $iend
24553                     && $types_to_go[ $i - 1 ] eq 'L'
24554                     && $types_to_go[ $i + 1 ] eq 'R' )
24555                 {
24556                     $type_fix = 'Q';
24557                 }
24558
24559                 # patch to make numbers and quotes align
24560                 if ( $type eq 'n' ) { $type_fix = 'Q' }
24561
24562                 $patterns[$j] .= $type_fix;
24563             } ## end elsif ( $is_w_n_C{$type} )
24564
24565             # ignore any ! in patterns
24566             elsif ( $type eq '!' ) { }
24567
24568             # everything else
24569             else {
24570                 $patterns[$j] .= $type;
24571             }
24572
24573             # remove any zero-level name at first fat comma
24574             if ( $depth == 0 && $type eq '=>' ) {
24575                 $container_name{$depth} = EMPTY_STRING;
24576             }
24577         } ## end for my $i ( $ibeg .. $iend)
24578
24579         # done with this line .. join text of tokens to make the last field
24580         push( @fields,
24581             join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
24582         push @field_lengths,
24583           $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
24584
24585         return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
24586     } ## end sub make_alignment_patterns
24587
24588 } ## end closure make_alignment_patterns
24589
24590 sub make_paren_name {
24591     my ( $self, $i ) = @_;
24592
24593     # The token at index $i is a '('.
24594     # Create an alignment name for it to avoid incorrect alignments.
24595
24596     # Start with the name of the previous nonblank token...
24597     my $name = EMPTY_STRING;
24598     my $im   = $i - 1;
24599     return EMPTY_STRING if ( $im < 0 );
24600     if ( $types_to_go[$im] eq 'b' ) { $im--; }
24601     return EMPTY_STRING if ( $im < 0 );
24602     $name = $tokens_to_go[$im];
24603
24604     # Prepend any sub name to an isolated -> to avoid unwanted alignments
24605     # [test case is test8/penco.pl]
24606     if ( $name eq '->' ) {
24607         $im--;
24608         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
24609             $name = $tokens_to_go[$im] . $name;
24610         }
24611     }
24612
24613     # Finally, remove any leading arrows
24614     if ( substr( $name, 0, 2 ) eq '->' ) {
24615         $name = substr( $name, 2 );
24616     }
24617     return $name;
24618 } ## end sub make_paren_name
24619
24620 {    ## begin closure final_indentation_adjustment
24621
24622     my ( $last_indentation_written, $last_unadjusted_indentation,
24623         $last_leading_token );
24624
24625     sub initialize_final_indentation_adjustment {
24626         $last_indentation_written    = 0;
24627         $last_unadjusted_indentation = 0;
24628         $last_leading_token          = EMPTY_STRING;
24629         return;
24630     }
24631
24632     sub final_indentation_adjustment {
24633
24634         #--------------------------------------------------------------------
24635         # This routine sets the final indentation of a line in the Formatter.
24636         #--------------------------------------------------------------------
24637
24638         # It starts with the basic indentation which has been defined for the
24639         # leading token, and then takes into account any options that the user
24640         # has set regarding special indenting and outdenting.
24641
24642         # This routine has to resolve a number of complex interacting issues,
24643         # including:
24644         # 1. The various -cti=n type flags, which contain the desired change in
24645         #    indentation for lines ending in commas and semicolons, should be
24646         #    followed,
24647         # 2. qw quotes require special processing and do not fit perfectly
24648         #    with normal containers,
24649         # 3. formatting with -wn can complicate things, especially with qw
24650         #    quotes,
24651         # 4. formatting with the -lp option is complicated, and does not
24652         #    work well with qw quotes and with -wn formatting.
24653         # 5. a number of special situations, such as 'cuddled' formatting.
24654         # 6. This routine is mainly concerned with outdenting closing tokens
24655         #    but note that there is some overlap with the functions of sub
24656         #    undo_ci, which was processed earlier, so care has to be taken to
24657         #    keep them coordinated.
24658
24659         my (
24660             $self,       $ibeg,
24661             $iend,       $rfields,
24662             $rpatterns,  $ri_first,
24663             $ri_last,    $rindentation_list,
24664             $level_jump, $starting_in_quote,
24665             $is_static_block_comment,
24666         ) = @_;
24667
24668         my $rLL                      = $self->[_rLL_];
24669         my $Klimit                   = $self->[_Klimit_];
24670         my $ris_bli_container        = $self->[_ris_bli_container_];
24671         my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
24672         my $rwant_reduced_ci         = $self->[_rwant_reduced_ci_];
24673         my $rK_weld_left             = $self->[_rK_weld_left_];
24674
24675         # Find the last code token of this line
24676         my $i_terminal    = $iend;
24677         my $terminal_type = $types_to_go[$iend];
24678         if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
24679             $i_terminal -= 1;
24680             $terminal_type = $types_to_go[$i_terminal];
24681             if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
24682                 $i_terminal -= 1;
24683                 $terminal_type = $types_to_go[$i_terminal];
24684             }
24685         }
24686
24687         my $terminal_block_type = $block_type_to_go[$i_terminal];
24688         my $is_outdented_line   = 0;
24689
24690         my $type_beg            = $types_to_go[$ibeg];
24691         my $token_beg           = $tokens_to_go[$ibeg];
24692         my $block_type_beg      = $block_type_to_go[$ibeg];
24693         my $level_beg           = $levels_to_go[$ibeg];
24694         my $leading_spaces_beg  = $leading_spaces_to_go[$ibeg];
24695         my $K_beg               = $K_to_go[$ibeg];
24696         my $seqno_beg           = $type_sequence_to_go[$ibeg];
24697         my $ibeg_weld_fix       = $ibeg;
24698         my $is_closing_type_beg = $is_closing_type{$type_beg};
24699         my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
24700
24701         # QW INDENTATION PATCH 3:
24702         my $seqno_qw_closing;
24703         if ( $type_beg eq 'q' && $ibeg == 0 ) {
24704             my $KK = $K_to_go[$ibeg];
24705             $seqno_qw_closing =
24706               $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
24707         }
24708
24709         my $is_semicolon_terminated = $terminal_type eq ';'
24710           && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
24711             || $seqno_qw_closing );
24712
24713         # NOTE: A future improvement would be to make it semicolon terminated
24714         # even if it does not have a semicolon but is followed by a closing
24715         # block brace. This would undo ci even for something like the
24716         # following, in which the final paren does not have a semicolon because
24717         # it is a possible weld location:
24718
24719         # if ($BOLD_MATH) {
24720         #     (
24721         #         $labels, $comment,
24722         #         join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
24723         #     )
24724         # }
24725         #
24726
24727         # MOJO: Set a flag if this lines begins with ')->'
24728         my $leading_paren_arrow = (
24729                  $is_closing_type_beg
24730               && $token_beg eq ')'
24731               && (
24732                 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
24733                 || (   $ibeg < $i_terminal - 1
24734                     && $types_to_go[ $ibeg + 1 ] eq 'b'
24735                     && $types_to_go[ $ibeg + 2 ] eq '->' )
24736               )
24737         );
24738
24739         #---------------------------------------------------------
24740         # Section 1: set a flag and a default indentation
24741         #
24742         # Most lines are indented according to the initial token.
24743         # But it is common to outdent to the level just after the
24744         # terminal token in certain cases...
24745         # adjust_indentation flag:
24746         #       0 - do not adjust
24747         #       1 - outdent
24748         #       2 - vertically align with opening token
24749         #       3 - indent
24750         #---------------------------------------------------------
24751         my $adjust_indentation         = 0;
24752         my $default_adjust_indentation = $adjust_indentation;
24753
24754         my (
24755             $opening_indentation, $opening_offset,
24756             $is_leading,          $opening_exists
24757         );
24758
24759         # Honor any flag to reduce -ci set by the -bbxi=n option
24760         if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
24761
24762             # if this is an opening, it must be alone on the line ...
24763             if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
24764                 $adjust_indentation = 1;
24765             }
24766
24767             # ... or a single welded unit (fix for b1173)
24768             elsif ($total_weld_count) {
24769                 my $Kterm      = $K_to_go[$i_terminal];
24770                 my $Kterm_test = $rK_weld_left->{$Kterm};
24771                 if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
24772                     $Kterm = $Kterm_test;
24773                 }
24774                 if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
24775             }
24776         }
24777
24778         # Update the $is_bli flag as we go. It is initially 1.
24779         # We note seeing a leading opening brace by setting it to 2.
24780         # If we get to the closing brace without seeing the opening then we
24781         # turn it off.  This occurs if the opening brace did not get output
24782         # at the start of a line, so we will then indent the closing brace
24783         # in the default way.
24784         if ( $is_bli_beg && $is_bli_beg == 1 ) {
24785             my $K_opening_container = $self->[_K_opening_container_];
24786             my $K_opening           = $K_opening_container->{$seqno_beg};
24787             if ( $K_beg eq $K_opening ) {
24788                 $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
24789             }
24790             else { $is_bli_beg = 0 }
24791         }
24792
24793         # QW PATCH for the combination -lp -wn
24794         # For -lp formatting use $ibeg_weld_fix to get around the problem
24795         # that with -lp type formatting the opening and closing tokens to not
24796         # have sequence numbers.
24797         if ( $seqno_qw_closing && $total_weld_count ) {
24798             my $i_plus = $inext_to_go[$ibeg];
24799             if ( $i_plus <= $max_index_to_go ) {
24800                 my $K_plus = $K_to_go[$i_plus];
24801                 if ( defined( $rK_weld_left->{$K_plus} ) ) {
24802                     $ibeg_weld_fix = $i_plus;
24803                 }
24804             }
24805         }
24806
24807         # if we are at a closing token of some type..
24808         if ( $is_closing_type_beg || $seqno_qw_closing ) {
24809
24810             # get the indentation of the line containing the corresponding
24811             # opening token
24812             (
24813                 $opening_indentation, $opening_offset,
24814                 $is_leading,          $opening_exists
24815               )
24816               = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
24817                 $ri_last, $rindentation_list, $seqno_qw_closing );
24818
24819             my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
24820
24821             # First set the default behavior:
24822             if (
24823
24824                 # default behavior is to outdent closing lines
24825                 # of the form:   ");  };  ];  )->xxx;"
24826                 $is_semicolon_terminated
24827
24828                 # and 'cuddled parens' of the form:   ")->pack("
24829                 # Bug fix for RT #123749]: the types here were
24830                 # incorrectly '(' and ')'.  Corrected to be '{' and '}'
24831                 || (
24832                        $terminal_type eq '{'
24833                     && $type_beg eq '}'
24834                     && ( $nesting_depth_to_go[$iend] + 1 ==
24835                         $nesting_depth_to_go[$ibeg] )
24836                 )
24837
24838                 # remove continuation indentation for any line like
24839                 #       } ... {
24840                 # or without ending '{' and unbalanced, such as
24841                 #       such as '}->{$operator}'
24842                 || (
24843                     $type_beg eq '}'
24844
24845                     && (   $types_to_go[$iend] eq '{'
24846                         || $levels_to_go[$iend] < $level_beg )
24847                 )
24848
24849                 # and when the next line is at a lower indentation level...
24850
24851                 # PATCH #1: and only if the style allows undoing continuation
24852                 # for all closing token types. We should really wait until
24853                 # the indentation of the next line is known and then make
24854                 # a decision, but that would require another pass.
24855
24856                 # PATCH #2: and not if this token is under -xci control
24857                 || (   $level_jump < 0
24858                     && !$some_closing_token_indentation
24859                     && !$rseqno_controlling_my_ci->{$K_beg} )
24860
24861                 # Patch for -wn=2, multiple welded closing tokens
24862                 || (   $i_terminal > $ibeg
24863                     && $is_closing_type{ $types_to_go[$iend] } )
24864
24865                 # Alternate Patch for git #51, isolated closing qw token not
24866                 # outdented if no-delete-old-newlines is set. This works, but
24867                 # a more general patch elsewhere fixes the real problem: ljump.
24868                 # || ( $seqno_qw_closing && $ibeg == $i_terminal )
24869
24870               )
24871             {
24872                 $adjust_indentation = 1;
24873             }
24874
24875             # outdent something like '),'
24876             if (
24877                 $terminal_type eq ','
24878
24879                 # Removed this constraint for -wn
24880                 # OLD: allow just one character before the comma
24881                 # && $i_terminal == $ibeg + 1
24882
24883                 # require LIST environment; otherwise, we may outdent too much -
24884                 # this can happen in calls without parentheses (overload.t);
24885                 && $terminal_is_in_list
24886               )
24887             {
24888                 $adjust_indentation = 1;
24889             }
24890
24891             # undo continuation indentation of a terminal closing token if
24892             # it is the last token before a level decrease.  This will allow
24893             # a closing token to line up with its opening counterpart, and
24894             # avoids an indentation jump larger than 1 level.
24895             if (   $i_terminal == $ibeg
24896                 && $is_closing_type_beg
24897                 && defined($K_beg)
24898                 && $K_beg < $Klimit )
24899             {
24900                 my $K_plus    = $K_beg + 1;
24901                 my $type_plus = $rLL->[$K_plus]->[_TYPE_];
24902
24903                 if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24904                     $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24905                 }
24906
24907                 if ( $type_plus eq '#' && $K_plus < $Klimit ) {
24908                     $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24909                     if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
24910                         $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
24911                     }
24912
24913                     # Note: we have skipped past just one comment (perhaps a
24914                     # side comment).  There could be more, and we could easily
24915                     # skip past all the rest with the following code, or with a
24916                     # while loop.  It would be rare to have to do this, and
24917                     # those block comments would still be indented, so it would
24918                     # to leave them indented.  So it seems best to just stop at
24919                     # a maximum of one comment.
24920                     ##if ($type_plus eq '#') {
24921                     ##   $K_plus = $self->K_next_code($K_plus);
24922                     ##}
24923                 }
24924
24925                 if ( !$is_bli_beg && defined($K_plus) ) {
24926                     my $lev        = $level_beg;
24927                     my $level_next = $rLL->[$K_plus]->[_LEVEL_];
24928
24929                     # and do not undo ci if it was set by the -xci option
24930                     $adjust_indentation = 1
24931                       if ( $level_next < $lev
24932                         && !$rseqno_controlling_my_ci->{$K_beg} );
24933                 }
24934
24935                 # Patch for RT #96101, in which closing brace of anonymous subs
24936                 # was not outdented.  We should look ahead and see if there is
24937                 # a level decrease at the next token (i.e., a closing token),
24938                 # but right now we do not have that information.  For now
24939                 # we see if we are in a list, and this works well.
24940                 # See test files 'sub*.t' for good test cases.
24941                 if (   $terminal_is_in_list
24942                     && !$rOpts_indent_closing_brace
24943                     && $block_type_beg
24944                     && $block_type_beg =~ /$ASUB_PATTERN/ )
24945                 {
24946                     (
24947                         $opening_indentation, $opening_offset,
24948                         $is_leading,          $opening_exists
24949                       )
24950                       = $self->get_opening_indentation( $ibeg, $ri_first,
24951                         $ri_last, $rindentation_list );
24952                     my $indentation = $leading_spaces_beg;
24953                     if ( defined($opening_indentation)
24954                         && get_spaces($indentation) >
24955                         get_spaces($opening_indentation) )
24956                     {
24957                         $adjust_indentation = 1;
24958                     }
24959                 }
24960             }
24961
24962             # YVES patch 1 of 2:
24963             # Undo ci of line with leading closing eval brace,
24964             # but not beyond the indentation of the line with
24965             # the opening brace.
24966             if (
24967                 $block_type_beg eq 'eval'
24968                 ##&& !$rOpts_line_up_parentheses
24969                 && !ref($leading_spaces_beg)
24970                 && !$rOpts_indent_closing_brace
24971               )
24972             {
24973                 (
24974                     $opening_indentation, $opening_offset,
24975                     $is_leading,          $opening_exists
24976                   )
24977                   = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
24978                     $rindentation_list );
24979                 my $indentation = $leading_spaces_beg;
24980                 if ( defined($opening_indentation)
24981                     && get_spaces($indentation) >
24982                     get_spaces($opening_indentation) )
24983                 {
24984                     $adjust_indentation = 1;
24985                 }
24986             }
24987
24988             # patch for issue git #40: -bli setting has priority
24989             $adjust_indentation = 0 if ($is_bli_beg);
24990
24991             $default_adjust_indentation = $adjust_indentation;
24992
24993             # Now modify default behavior according to user request:
24994             # handle option to indent non-blocks of the form );  };  ];
24995             # But don't do special indentation to something like ')->pack('
24996             if ( !$block_type_beg ) {
24997
24998                 # Note that logical padding has already been applied, so we may
24999                 # need to remove some spaces to get a valid hash key.
25000                 my $tok = $token_beg;
25001                 my $cti = $closing_token_indentation{$tok};
25002
25003                 # Fix the value of 'cti' for an isolated non-welded closing qw
25004                 # delimiter.
25005                 if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
25006
25007                     # A quote delimiter which is not a container will not have
25008                     # a cti value defined.  In this case use the style of a
25009                     # paren. For example
25010                     #   my @fars = (
25011                     #      qw<
25012                     #        far
25013                     #        farfar
25014                     #        farfars-far
25015                     #      >,
25016                     #   );
25017                     if ( !defined($cti) && length($tok) == 1 ) {
25018
25019                         # something other than ')', '}', ']' ; use flag for ')'
25020                         $cti = $closing_token_indentation{')'};
25021
25022                         # But for now, do not outdent non-container qw
25023                         # delimiters because it would would change existing
25024                         # formatting.
25025                         if ( $tok ne '>' ) { $cti = 3 }
25026                     }
25027
25028                     # A non-welded closing qw cannot currently use -cti=1
25029                     # because that option requires a sequence number to find
25030                     # the opening indentation, and qw quote delimiters are not
25031                     # sequenced items.
25032                     if ( defined($cti) && $cti == 1 ) { $cti = 0 }
25033                 }
25034
25035                 if ( !defined($cti) ) {
25036
25037                     # $cti may not be defined for several reasons.
25038                     # -padding may have been applied so the character
25039                     #  has a length > 1
25040                     # - we may have welded to a closing quote token.
25041                     #   Here is an example (perltidy -wn):
25042                     #       __PACKAGE__->load_components( qw(
25043                     #  >         Core
25044                     #  >
25045                     #  >     ) );
25046                     $adjust_indentation = 0;
25047
25048                 }
25049                 elsif ( $cti == 1 ) {
25050                     if (   $i_terminal <= $ibeg + 1
25051                         || $is_semicolon_terminated )
25052                     {
25053                         $adjust_indentation = 2;
25054                     }
25055                     else {
25056                         $adjust_indentation = 0;
25057                     }
25058                 }
25059                 elsif ( $cti == 2 ) {
25060                     if ($is_semicolon_terminated) {
25061                         $adjust_indentation = 3;
25062                     }
25063                     else {
25064                         $adjust_indentation = 0;
25065                     }
25066                 }
25067                 elsif ( $cti == 3 ) {
25068                     $adjust_indentation = 3;
25069                 }
25070             }
25071
25072             # handle option to indent blocks
25073             else {
25074                 if (
25075                     $rOpts_indent_closing_brace
25076                     && (
25077                         $i_terminal == $ibeg    #  isolated terminal '}'
25078                         || $is_semicolon_terminated
25079                     )
25080                   )                             #  } xxxx ;
25081                 {
25082                     $adjust_indentation = 3;
25083                 }
25084             }
25085         }
25086
25087         # if at ');', '};', '>;', and '];' of a terminal qw quote
25088         elsif (
25089                substr( $rpatterns->[0], 0, 2 ) eq 'qb'
25090             && substr( $rfields->[0], -1, 1 ) eq ';'
25091             ##&& $rpatterns->[0] =~ /^qb*;$/
25092             && $rfields->[0] =~ /^([\)\}\]\>]);$/
25093           )
25094         {
25095             if ( $closing_token_indentation{$1} == 0 ) {
25096                 $adjust_indentation = 1;
25097             }
25098             else {
25099                 $adjust_indentation = 3;
25100             }
25101         }
25102
25103         # if line begins with a ':', align it with any
25104         # previous line leading with corresponding ?
25105         elsif ( $type_beg eq ':' ) {
25106             (
25107                 $opening_indentation, $opening_offset,
25108                 $is_leading,          $opening_exists
25109               )
25110               = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
25111                 $rindentation_list );
25112             if ($is_leading) { $adjust_indentation = 2; }
25113         }
25114
25115         #---------------------------------------------------------
25116         # Section 2: set indentation according to flag set above
25117         #
25118         # Select the indentation object to define leading
25119         # whitespace.  If we are outdenting something like '} } );'
25120         # then we want to use one level below the last token
25121         # ($i_terminal) in order to get it to fully outdent through
25122         # all levels.
25123         #---------------------------------------------------------
25124         my $indentation;
25125         my $lev;
25126         my $level_end = $levels_to_go[$iend];
25127
25128         if ( $adjust_indentation == 0 ) {
25129             $indentation = $leading_spaces_beg;
25130             $lev         = $level_beg;
25131         }
25132         elsif ( $adjust_indentation == 1 ) {
25133
25134             # Change the indentation to be that of a different token on the line
25135             # Previously, the indentation of the terminal token was used:
25136             # OLD CODING:
25137             # $indentation = $reduced_spaces_to_go[$i_terminal];
25138             # $lev         = $levels_to_go[$i_terminal];
25139
25140             # Generalization for MOJO:
25141             # Use the lowest level indentation of the tokens on the line.
25142             # For example, here we can use the indentation of the ending ';':
25143             #    } until ($selection > 0 and $selection < 10);   # ok to use ';'
25144             # But this will not outdent if we use the terminal indentation:
25145             #    )->then( sub {      # use indentation of the ->, not the {
25146             # Warning: reduced_spaces_to_go[] may be a reference, do not
25147             # do numerical checks with it
25148
25149             my $i_ind = $ibeg;
25150             $indentation = $reduced_spaces_to_go[$i_ind];
25151             $lev         = $levels_to_go[$i_ind];
25152             while ( $i_ind < $i_terminal ) {
25153                 $i_ind++;
25154                 if ( $levels_to_go[$i_ind] < $lev ) {
25155                     $indentation = $reduced_spaces_to_go[$i_ind];
25156                     $lev         = $levels_to_go[$i_ind];
25157                 }
25158             }
25159         }
25160
25161         # handle indented closing token which aligns with opening token
25162         elsif ( $adjust_indentation == 2 ) {
25163
25164             # handle option to align closing token with opening token
25165             $lev = $level_beg;
25166
25167             # calculate spaces needed to align with opening token
25168             my $space_count =
25169               get_spaces($opening_indentation) + $opening_offset;
25170
25171             # Indent less than the previous line.
25172             #
25173             # Problem: For -lp we don't exactly know what it was if there
25174             # were recoverable spaces sent to the aligner.  A good solution
25175             # would be to force a flush of the vertical alignment buffer, so
25176             # that we would know.  For now, this rule is used for -lp:
25177             #
25178             # When the last line did not start with a closing token we will
25179             # be optimistic that the aligner will recover everything wanted.
25180             #
25181             # This rule will prevent us from breaking a hierarchy of closing
25182             # tokens, and in a worst case will leave a closing paren too far
25183             # indented, but this is better than frequently leaving it not
25184             # indented enough.
25185             my $last_spaces = get_spaces($last_indentation_written);
25186
25187             if ( ref($last_indentation_written)
25188                 && !$is_closing_token{$last_leading_token} )
25189             {
25190                 $last_spaces +=
25191                   get_recoverable_spaces($last_indentation_written);
25192             }
25193
25194             # reset the indentation to the new space count if it works
25195             # only options are all or none: nothing in-between looks good
25196             $lev = $level_beg;
25197
25198             my $diff = $last_spaces - $space_count;
25199             if ( $diff > 0 ) {
25200                 $indentation = $space_count;
25201             }
25202             else {
25203
25204                 # We need to fix things ... but there is no good way to do it.
25205                 # The best solution is for the user to use a longer maximum
25206                 # line length.  We could get a smooth variation if we just move
25207                 # the paren in using
25208                 #    $space_count -= ( 1 - $diff );
25209                 # But unfortunately this can give a rather unbalanced look.
25210
25211                 # For -xlp we currently allow a tolerance of one indentation
25212                 # level and then revert to a simpler default.  This will jump
25213                 # suddenly but keeps a balanced look.
25214                 if (   $rOpts_extended_line_up_parentheses
25215                     && $diff >= -$rOpts_indent_columns
25216                     && $space_count > $leading_spaces_beg )
25217                 {
25218                     $indentation = $space_count;
25219                 }
25220
25221                 # Otherwise revert to defaults
25222                 elsif ( $default_adjust_indentation == 0 ) {
25223                     $indentation = $leading_spaces_beg;
25224                 }
25225                 elsif ( $default_adjust_indentation == 1 ) {
25226                     $indentation = $reduced_spaces_to_go[$i_terminal];
25227                     $lev         = $levels_to_go[$i_terminal];
25228                 }
25229             }
25230         }
25231
25232         # Full indentation of closing tokens (-icb and -icp or -cti=2)
25233         else {
25234
25235             # handle -icb (indented closing code block braces)
25236             # Updated method for indented block braces: indent one full level if
25237             # there is no continuation indentation.  This will occur for major
25238             # structures such as sub, if, else, but not for things like map
25239             # blocks.
25240             #
25241             # Note: only code blocks without continuation indentation are
25242             # handled here (if, else, unless, ..). In the following snippet,
25243             # the terminal brace of the sort block will have continuation
25244             # indentation as shown so it will not be handled by the coding
25245             # here.  We would have to undo the continuation indentation to do
25246             # this, but it probably looks ok as is.  This is a possible future
25247             # update for semicolon terminated lines.
25248             #
25249             #     if ($sortby eq 'date' or $sortby eq 'size') {
25250             #         @files = sort {
25251             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
25252             #                 or $a cmp $b
25253             #                 } @files;
25254             #         }
25255             #
25256             if (   $block_type_beg
25257                 && $ci_levels_to_go[$i_terminal] == 0 )
25258             {
25259                 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
25260                 $indentation = $spaces + $rOpts_indent_columns;
25261
25262                 # NOTE: for -lp we could create a new indentation object, but
25263                 # there is probably no need to do it
25264             }
25265
25266             # handle -icp and any -icb block braces which fall through above
25267             # test such as the 'sort' block mentioned above.
25268             else {
25269
25270                 # There are currently two ways to handle -icp...
25271                 # One way is to use the indentation of the previous line:
25272                 # $indentation = $last_indentation_written;
25273
25274                 # The other way is to use the indentation that the previous line
25275                 # would have had if it hadn't been adjusted:
25276                 $indentation = $last_unadjusted_indentation;
25277
25278                 # Current method: use the minimum of the two. This avoids
25279                 # inconsistent indentation.
25280                 if ( get_spaces($last_indentation_written) <
25281                     get_spaces($indentation) )
25282                 {
25283                     $indentation = $last_indentation_written;
25284                 }
25285             }
25286
25287             # use previous indentation but use own level
25288             # to cause list to be flushed properly
25289             $lev = $level_beg;
25290         }
25291
25292         # remember indentation except for multi-line quotes, which get
25293         # no indentation
25294         unless ( $ibeg == 0 && $starting_in_quote ) {
25295             $last_indentation_written    = $indentation;
25296             $last_unadjusted_indentation = $leading_spaces_beg;
25297             $last_leading_token          = $token_beg;
25298
25299             # Patch to make a line which is the end of a qw quote work with the
25300             # -lp option.  Make $token_beg look like a closing token as some
25301             # type even if it is not.  This variable will become
25302             # $last_leading_token at the end of this loop.  Then, if the -lp
25303             # style is selected, and the next line is also a
25304             # closing token, it will not get more indentation than this line.
25305             # We need to do this because qw quotes (at present) only get
25306             # continuation indentation, not one level of indentation, so we
25307             # need to turn off the -lp indentation.
25308
25309             # ... a picture is worth a thousand words:
25310
25311             # perltidy -wn -gnu (Without this patch):
25312             #   ok(defined(
25313             #       $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25314             #       2981014)])
25315             #             ));
25316
25317             # perltidy -wn -gnu (With this patch):
25318             #  ok(defined(
25319             #      $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
25320             #      2981014)])
25321             #  ));
25322             if ( $seqno_qw_closing
25323                 && ( length($token_beg) > 1 || $token_beg eq '>' ) )
25324             {
25325                 $last_leading_token = ')';
25326             }
25327         }
25328
25329         # be sure lines with leading closing tokens are not outdented more
25330         # than the line which contained the corresponding opening token.
25331
25332         #--------------------------------------------------------
25333         # updated per bug report in alex_bug.pl: we must not
25334         # mess with the indentation of closing logical braces so
25335         # we must treat something like '} else {' as if it were
25336         # an isolated brace
25337         #--------------------------------------------------------
25338         my $is_isolated_block_brace = $block_type_beg
25339           && ( $i_terminal == $ibeg
25340             || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
25341           );
25342
25343         # only do this for a ':; which is aligned with its leading '?'
25344         my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
25345
25346         if (
25347             defined($opening_indentation)
25348             && !$leading_paren_arrow    # MOJO
25349             && !$is_isolated_block_brace
25350             && !$is_unaligned_colon
25351           )
25352         {
25353             if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
25354                 $indentation = $opening_indentation;
25355             }
25356         }
25357
25358         # remember the indentation of each line of this batch
25359         push @{$rindentation_list}, $indentation;
25360
25361         # outdent lines with certain leading tokens...
25362         if (
25363
25364             # must be first word of this batch
25365             $ibeg == 0
25366
25367             # and ...
25368             && (
25369
25370                 # certain leading keywords if requested
25371                 $rOpts_outdent_keywords
25372                 && $type_beg eq 'k'
25373                 && $outdent_keyword{$token_beg}
25374
25375                 # or labels if requested
25376                 || $rOpts_outdent_labels && $type_beg eq 'J'
25377
25378                 # or static block comments if requested
25379                 || $is_static_block_comment
25380                 && $rOpts_outdent_static_block_comments
25381             )
25382           )
25383         {
25384             my $space_count = leading_spaces_to_go($ibeg);
25385             if ( $space_count > 0 ) {
25386                 $space_count -= $rOpts_continuation_indentation;
25387                 $is_outdented_line = 1;
25388                 if ( $space_count < 0 ) { $space_count = 0 }
25389
25390                 # do not promote a spaced static block comment to non-spaced;
25391                 # this is not normally necessary but could be for some
25392                 # unusual user inputs (such as -ci = -i)
25393                 if ( $type_beg eq '#' && $space_count == 0 ) {
25394                     $space_count = 1;
25395                 }
25396
25397                 $indentation = $space_count;
25398             }
25399         }
25400
25401         return ( $indentation, $lev, $level_end, $terminal_type,
25402             $terminal_block_type, $is_semicolon_terminated,
25403             $is_outdented_line );
25404     } ## end sub final_indentation_adjustment
25405 } ## end closure final_indentation_adjustment
25406
25407 sub get_opening_indentation {
25408
25409     # get the indentation of the line which output the opening token
25410     # corresponding to a given closing token in the current output batch.
25411     #
25412     # given:
25413     # $i_closing - index in this line of a closing token ')' '}' or ']'
25414     #
25415     # $ri_first - reference to list of the first index $i for each output
25416     #               line in this batch
25417     # $ri_last - reference to list of the last index $i for each output line
25418     #              in this batch
25419     # $rindentation_list - reference to a list containing the indentation
25420     #            used for each line.
25421     # $qw_seqno - optional sequence number to use if normal seqno not defined
25422     #           (TODO: would be more general to just look this up from index i)
25423     #
25424     # return:
25425     #   -the indentation of the line which contained the opening token
25426     #    which matches the token at index $i_opening
25427     #   -and its offset (number of columns) from the start of the line
25428     #
25429     my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
25430       = @_;
25431
25432     # first, see if the opening token is in the current batch
25433     my $i_opening = $mate_index_to_go[$i_closing];
25434     my ( $indent, $offset, $is_leading, $exists );
25435     $exists = 1;
25436     if ( defined($i_opening) && $i_opening >= 0 ) {
25437
25438         # it is..look up the indentation
25439         ( $indent, $offset, $is_leading ) =
25440           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
25441             $rindentation_list );
25442     }
25443
25444     # if not, it should have been stored in the hash by a previous batch
25445     else {
25446         my $seqno = $type_sequence_to_go[$i_closing];
25447         $seqno = $qw_seqno unless ($seqno);
25448         ( $indent, $offset, $is_leading, $exists ) =
25449           get_saved_opening_indentation($seqno);
25450     }
25451     return ( $indent, $offset, $is_leading, $exists );
25452 } ## end sub get_opening_indentation
25453
25454 sub set_vertical_tightness_flags {
25455
25456     my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
25457         $ending_in_quote, $closing_side_comment )
25458       = @_;
25459
25460     # Define vertical tightness controls for the nth line of a batch.
25461
25462     # These parameters are passed to the vertical aligner to indicated
25463     # if we should combine this line with the next line to achieve the
25464     # desired vertical tightness.  This was previously an array but
25465     # has been converted to a hash:
25466
25467     # old   hash              Meaning
25468     # index key
25469     #
25470     # 0   _vt_type:           1=opening non-block    2=closing non-block
25471     #                         3=opening block brace  4=closing block brace
25472     #
25473     # 1a  _vt_opening_flag:   1=no multiple steps, 2=multiple steps ok
25474     # 1b  _vt_closing_flag:   spaces of padding to use if closing
25475     # 2   _vt_seqno:          sequence number of container
25476     # 3   _vt_valid flag:     do not append if this flag is false. Will be
25477     #           true if appropriate -vt flag is set.  Otherwise, Will be
25478     #           made true only for 2 line container in parens with -lp
25479     # 4   _vt_seqno_beg:      sequence number of first token of line
25480     # 5   _vt_seqno_end:      sequence number of last token of line
25481     # 6   _vt_min_lines:      min number of lines for joining opening cache,
25482     #                           0=no constraint
25483     # 7   _vt_max_lines:      max number of lines for joining opening cache,
25484     #                           0=no constraint
25485
25486     # The vertical tightness mechanism can add whitespace, so whitespace can
25487     # continually increase if we allowed it when the -fws flag is set.
25488     # See case b499 for an example.
25489
25490     # Speedup: just return for a comment
25491     if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
25492         return;
25493     }
25494
25495     # Define these values...
25496     my $vt_type         = 0;
25497     my $vt_opening_flag = 0;
25498     my $vt_closing_flag = 0;
25499     my $vt_seqno        = 0;
25500     my $vt_valid_flag   = 0;
25501     my $vt_seqno_beg    = 0;
25502     my $vt_seqno_end    = 0;
25503     my $vt_min_lines    = 0;
25504     my $vt_max_lines    = 0;
25505
25506     goto RETURN
25507       if ($rOpts_freeze_whitespace);
25508
25509     # Uses these global parameters:
25510     #   $rOpts_block_brace_tightness
25511     #   $rOpts_block_brace_vertical_tightness
25512     #   $rOpts_stack_closing_block_brace
25513     #   %opening_vertical_tightness
25514     #   %closing_vertical_tightness
25515     #   %opening_token_right
25516     #   %stack_closing_token
25517     #   %stack_opening_token
25518
25519     #--------------------------------------------------------------
25520     # Vertical Tightness Flags Section 1:
25521     # Handle Lines 1 .. n-1 but not the last line
25522     # For non-BLOCK tokens, we will need to examine the next line
25523     # too, so we won't consider the last line.
25524     #--------------------------------------------------------------
25525     if ( $n < $n_last_line ) {
25526
25527         #--------------------------------------------------------------
25528         # Vertical Tightness Flags Section 1a:
25529         # Look for Type 1, last token of this line is a non-block opening token
25530         #--------------------------------------------------------------
25531         my $ibeg_next = $ri_first->[ $n + 1 ];
25532         my $token_end = $tokens_to_go[$iend];
25533         my $iend_next = $ri_last->[ $n + 1 ];
25534
25535         if (
25536                $type_sequence_to_go[$iend]
25537             && !$block_type_to_go[$iend]
25538             && $is_opening_token{$token_end}
25539             && (
25540                 $opening_vertical_tightness{$token_end} > 0
25541
25542                 # allow 2-line method call to be closed up
25543                 || (   $rOpts_line_up_parentheses
25544                     && $token_end eq '('
25545                     && $self->[_rlp_object_by_seqno_]
25546                     ->{ $type_sequence_to_go[$iend] }
25547                     && $iend > $ibeg
25548                     && $types_to_go[ $iend - 1 ] ne 'b' )
25549             )
25550           )
25551         {
25552             # avoid multiple jumps in nesting depth in one line if
25553             # requested
25554             my $ovt = $opening_vertical_tightness{$token_end};
25555
25556             # Turn off the -vt flag if the next line ends in a weld.
25557             # This avoids an instability with one-line welds (fixes b1183).
25558             my $type_end_next = $types_to_go[$iend_next];
25559             $ovt = 0
25560               if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
25561                 && $is_closing_type{$type_end_next} );
25562
25563             # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
25564             # See similar patch above for $cvt.
25565             my $seqno = $type_sequence_to_go[$iend];
25566             if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
25567                 $ovt = 0;
25568             }
25569
25570             if (   $ovt == 2
25571                 && $self->[_rreduce_vertical_tightness_by_seqno_]->{$seqno} )
25572             {
25573                 $ovt = 1;
25574             }
25575
25576             unless (
25577                 $ovt < 2
25578                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
25579                     $nesting_depth_to_go[$ibeg_next] )
25580               )
25581             {
25582
25583                 # If -vt flag has not been set, mark this as invalid
25584                 # and aligner will validate it if it sees the closing paren
25585                 # within 2 lines.
25586                 my $valid_flag = $ovt;
25587
25588                 $vt_type         = 1;
25589                 $vt_opening_flag = $ovt;
25590                 $vt_seqno        = $type_sequence_to_go[$iend];
25591                 $vt_valid_flag   = $valid_flag;
25592             }
25593         }
25594
25595         #--------------------------------------------------------------
25596         # Vertical Tightness Flags Section 1b:
25597         # Look for Type 2, first token of next line is a non-block closing
25598         # token .. and be sure this line does not have a side comment
25599         #--------------------------------------------------------------
25600         my $token_next = $tokens_to_go[$ibeg_next];
25601         if (   $type_sequence_to_go[$ibeg_next]
25602             && !$block_type_to_go[$ibeg_next]
25603             && $is_closing_token{$token_next}
25604             && $types_to_go[$iend] ne '#' )    # for safety, shouldn't happen!
25605         {
25606             my $ovt = $opening_vertical_tightness{$token_next};
25607             my $cvt = $closing_vertical_tightness{$token_next};
25608
25609             # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
25610             # See similar patch above for $ovt.
25611             my $seqno = $type_sequence_to_go[$ibeg_next];
25612             if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
25613                 $cvt = 0;
25614             }
25615
25616             # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
25617             # otherwise.  Added for rt136417.
25618             if ( $cvt == 3 ) {
25619                 $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
25620             }
25621
25622             # The unusual combination -pvtc=2 -dws -naws can be unstable.
25623             # This fixes b1282, b1283.  This can be moved to set_options.
25624             if (   $cvt == 2
25625                 && $rOpts_delete_old_whitespace
25626                 && !$rOpts_add_whitespace )
25627             {
25628                 $cvt = 1;
25629             }
25630
25631             if (
25632
25633                 # Never append a trailing line like   ')->pack(' because it
25634                 # will throw off later alignment.  So this line must start at a
25635                 # deeper level than the next line (fix1 for welding, git #45).
25636                 (
25637                     $nesting_depth_to_go[$ibeg_next] >=
25638                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
25639                 )
25640                 && (
25641                     $cvt == 2
25642                     || (
25643                         !$self->is_in_list_by_i($ibeg_next)
25644                         && (
25645                             $cvt == 1
25646
25647                             # allow closing up 2-line method calls
25648                             || (   $rOpts_line_up_parentheses
25649                                 && $token_next eq ')'
25650                                 && $self->[_rlp_object_by_seqno_]
25651                                 ->{ $type_sequence_to_go[$ibeg_next] } )
25652                         )
25653                     )
25654                 )
25655               )
25656             {
25657
25658                 # decide which trailing closing tokens to append..
25659                 my $ok = 0;
25660                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
25661                 else {
25662                     my $str = join( EMPTY_STRING,
25663                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
25664
25665                     # append closing token if followed by comment or ';'
25666                     # or another closing token (fix2 for welding, git #45)
25667                     if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
25668                 }
25669
25670                 if ($ok) {
25671                     my $valid_flag = $cvt;
25672                     my $min_lines  = 0;
25673                     my $max_lines  = 0;
25674
25675                     # Fix for b1187 and b1188: Blinking can occur if we allow
25676                     # welded tokens to re-form into one-line blocks during
25677                     # vertical alignment when -lp used.  So for this case we
25678                     # set the minimum number of lines to be 1 instead of 0.
25679                     # The maximum should be 1 if -vtc is not used.  If -vtc is
25680                     # used, we turn the valid
25681                     # flag off and set the maximum to 0. This is equivalent to
25682                     # using a large number.
25683                     my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
25684                     if (   $rOpts_line_up_parentheses
25685                         && $total_weld_count
25686                         && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
25687                         && $self->is_welded_at_seqno($seqno_ibeg_next) )
25688                     {
25689                         $min_lines  = 1;
25690                         $max_lines  = $cvt ? 0 : 1;
25691                         $valid_flag = 0;
25692                     }
25693
25694                     $vt_type         = 2;
25695                     $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
25696                     $vt_seqno        = $type_sequence_to_go[$ibeg_next];
25697                     $vt_valid_flag   = $valid_flag;
25698                     $vt_min_lines    = $min_lines;
25699                     $vt_max_lines    = $max_lines;
25700                 }
25701             }
25702         }
25703
25704         #--------------------------------------------------------------
25705         # Vertical Tightness Flags Section 1c:
25706         # Implement the Opening Token Right flag (Type 2)..
25707         # If requested, move an isolated trailing opening token to the end of
25708         # the previous line which ended in a comma.  We could do this
25709         # in sub recombine_breakpoints but that would cause problems
25710         # with -lp formatting.  The problem is that indentation will
25711         # quickly move far to the right in nested expressions.  By
25712         # doing it after indentation has been set, we avoid changes
25713         # to the indentation.  Actual movement of the token takes place
25714         # in sub valign_output_step_B.
25715
25716         # Note added 4 May 2021: the man page suggests that the -otr flags
25717         # are mainly for opening tokens following commas.  But this seems
25718         # to have been generalized long ago to include other situations.
25719         # I checked the coding back to 2012 and it is essentially the same
25720         # as here, so it is best to leave this unchanged for now.
25721         #--------------------------------------------------------------
25722         if (
25723             $opening_token_right{ $tokens_to_go[$ibeg_next] }
25724
25725             # previous line is not opening
25726             # (use -sot to combine with it)
25727             && !$is_opening_token{$token_end}
25728
25729             # previous line ended in one of these
25730             # (add other cases if necessary; '=>' and '.' are not necessary
25731             && !$block_type_to_go[$ibeg_next]
25732
25733             # this is a line with just an opening token
25734             && (   $iend_next == $ibeg_next
25735                 || $iend_next == $ibeg_next + 2
25736                 && $types_to_go[$iend_next] eq '#' )
25737
25738             # Fix for case b1060 when both -baoo and -otr are set:
25739             # to avoid blinking, honor the -baoo flag over the -otr flag.
25740             && $token_end ne '||' && $token_end ne '&&'
25741
25742             # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
25743             && !(
25744                    $token_end eq '='
25745                 && $rOpts_line_up_parentheses
25746                 && $self->[_rlp_object_by_seqno_]
25747                 ->{ $type_sequence_to_go[$ibeg_next] }
25748             )
25749
25750             # looks bad if we align vertically with the wrong container
25751             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
25752           )
25753         {
25754             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25755
25756             $vt_type         = 2;
25757             $vt_closing_flag = $spaces;
25758             $vt_seqno        = $type_sequence_to_go[$ibeg_next];
25759             $vt_valid_flag   = 1;
25760         }
25761
25762         #--------------------------------------------------------------
25763         # Vertical Tightness Flags Section 1d:
25764         # Stacking of opening and closing tokens (Type 2)
25765         #--------------------------------------------------------------
25766         my $stackable;
25767         my $token_beg_next = $tokens_to_go[$ibeg_next];
25768
25769         # patch to make something like 'qw(' behave like an opening paren
25770         # (aran.t)
25771         if ( $types_to_go[$ibeg_next] eq 'q' ) {
25772             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
25773                 $token_beg_next = $1;
25774             }
25775         }
25776
25777         if (   $is_closing_token{$token_end}
25778             && $is_closing_token{$token_beg_next} )
25779         {
25780
25781             # avoid instability of combo -bom and -sct; b1179
25782             my $seq_next = $type_sequence_to_go[$ibeg_next];
25783             $stackable = $stack_closing_token{$token_beg_next}
25784               unless ( $block_type_to_go[$ibeg_next]
25785                 || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
25786         }
25787         elsif ($is_opening_token{$token_end}
25788             && $is_opening_token{$token_beg_next} )
25789         {
25790             $stackable = $stack_opening_token{$token_beg_next}
25791               unless ( $block_type_to_go[$ibeg_next] )
25792               ;    # shouldn't happen; just checking
25793         }
25794
25795         if ($stackable) {
25796
25797             my $is_semicolon_terminated;
25798             if ( $n + 1 == $n_last_line ) {
25799                 my ( $terminal_type, $i_terminal ) =
25800                   terminal_type_i( $ibeg_next, $iend_next );
25801                 $is_semicolon_terminated = $terminal_type eq ';'
25802                   && $nesting_depth_to_go[$iend_next] <
25803                   $nesting_depth_to_go[$ibeg_next];
25804             }
25805
25806             # this must be a line with just an opening token
25807             # or end in a semicolon
25808             if (
25809                 $is_semicolon_terminated
25810                 || (   $iend_next == $ibeg_next
25811                     || $iend_next == $ibeg_next + 2
25812                     && $types_to_go[$iend_next] eq '#' )
25813               )
25814             {
25815                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
25816
25817                 $vt_type         = 2;
25818                 $vt_closing_flag = $spaces;
25819                 $vt_seqno        = $type_sequence_to_go[$ibeg_next];
25820                 $vt_valid_flag   = 1;
25821
25822             }
25823         }
25824     }
25825
25826     #--------------------------------------------------------------
25827     # Vertical Tightness Flags Section 2:
25828     # Handle type 3, opening block braces on last line of the batch
25829     # Check for a last line with isolated opening BLOCK curly
25830     #--------------------------------------------------------------
25831     elsif ($rOpts_block_brace_vertical_tightness
25832         && $ibeg eq $iend
25833         && $types_to_go[$iend] eq '{'
25834         && $block_type_to_go[$iend] =~
25835         /$block_brace_vertical_tightness_pattern/ )
25836     {
25837         $vt_type         = 3;
25838         $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
25839         $vt_seqno        = 0;
25840         $vt_valid_flag   = 1;
25841     }
25842
25843     #--------------------------------------------------------------
25844     # Vertical Tightness Flags Section 3:
25845     # Handle type 4, a closing block brace on the last line of the batch Check
25846     # for a last line with isolated closing BLOCK curly
25847     # Patch: added a check for any new closing side comment which the
25848     # -csc option may generate. If it exists, there will be a side comment
25849     # so we cannot combine with a brace on the next line.  This issue
25850     # occurs for the combination -scbb and -csc is used.
25851     #--------------------------------------------------------------
25852     elsif ($rOpts_stack_closing_block_brace
25853         && $ibeg eq $iend
25854         && $block_type_to_go[$iend]
25855         && $types_to_go[$iend] eq '}'
25856         && ( !$closing_side_comment || $n < $n_last_line ) )
25857     {
25858         my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
25859
25860         $vt_type         = 4;
25861         $vt_closing_flag = $spaces;
25862         $vt_seqno        = $type_sequence_to_go[$iend];
25863         $vt_valid_flag   = 1;
25864
25865     }
25866
25867     # get the sequence numbers of the ends of this line
25868     $vt_seqno_beg = $type_sequence_to_go[$ibeg];
25869     if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
25870         $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
25871     }
25872
25873     $vt_seqno_end = $type_sequence_to_go[$iend];
25874     if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
25875         $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
25876     }
25877
25878   RETURN:
25879
25880     my $rvertical_tightness_flags = {
25881         _vt_type         => $vt_type,
25882         _vt_opening_flag => $vt_opening_flag,
25883         _vt_closing_flag => $vt_closing_flag,
25884         _vt_seqno        => $vt_seqno,
25885         _vt_valid_flag   => $vt_valid_flag,
25886         _vt_seqno_beg    => $vt_seqno_beg,
25887         _vt_seqno_end    => $vt_seqno_end,
25888         _vt_min_lines    => $vt_min_lines,
25889         _vt_max_lines    => $vt_max_lines,
25890     };
25891
25892     return ($rvertical_tightness_flags);
25893 } ## end sub set_vertical_tightness_flags
25894
25895 ##########################################################
25896 # CODE SECTION 14: Code for creating closing side comments
25897 ##########################################################
25898
25899 {    ## begin closure accumulate_csc_text
25900
25901 # These routines are called once per batch when the --closing-side-comments flag
25902 # has been set.
25903
25904     my %block_leading_text;
25905     my %block_opening_line_number;
25906     my $csc_new_statement_ok;
25907     my $csc_last_label;
25908     my %csc_block_label;
25909     my $accumulating_text_for_block;
25910     my $leading_block_text;
25911     my $rleading_block_if_elsif_text;
25912     my $leading_block_text_level;
25913     my $leading_block_text_length_exceeded;
25914     my $leading_block_text_line_length;
25915     my $leading_block_text_line_number;
25916
25917     sub initialize_csc_vars {
25918         %block_leading_text           = ();
25919         %block_opening_line_number    = ();
25920         $csc_new_statement_ok         = 1;
25921         $csc_last_label               = EMPTY_STRING;
25922         %csc_block_label              = ();
25923         $rleading_block_if_elsif_text = [];
25924         $accumulating_text_for_block  = EMPTY_STRING;
25925         reset_block_text_accumulator();
25926         return;
25927     } ## end sub initialize_csc_vars
25928
25929     sub reset_block_text_accumulator {
25930
25931         # save text after 'if' and 'elsif' to append after 'else'
25932         if ($accumulating_text_for_block) {
25933
25934             ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
25935             if ( $is_if_elsif{$accumulating_text_for_block} ) {
25936                 push @{$rleading_block_if_elsif_text}, $leading_block_text;
25937             }
25938         }
25939         $accumulating_text_for_block        = EMPTY_STRING;
25940         $leading_block_text                 = EMPTY_STRING;
25941         $leading_block_text_level           = 0;
25942         $leading_block_text_length_exceeded = 0;
25943         $leading_block_text_line_number     = 0;
25944         $leading_block_text_line_length     = 0;
25945         return;
25946     } ## end sub reset_block_text_accumulator
25947
25948     sub set_block_text_accumulator {
25949         my ( $self, $i ) = @_;
25950         $accumulating_text_for_block = $tokens_to_go[$i];
25951         if ( $accumulating_text_for_block !~ /^els/ ) {
25952             $rleading_block_if_elsif_text = [];
25953         }
25954         $leading_block_text                 = EMPTY_STRING;
25955         $leading_block_text_level           = $levels_to_go[$i];
25956         $leading_block_text_line_number     = $self->get_output_line_number();
25957         $leading_block_text_length_exceeded = 0;
25958
25959         # this will contain the column number of the last character
25960         # of the closing side comment
25961         $leading_block_text_line_length =
25962           length($csc_last_label) +
25963           length($accumulating_text_for_block) +
25964           length( $rOpts->{'closing-side-comment-prefix'} ) +
25965           $leading_block_text_level * $rOpts_indent_columns + 3;
25966         return;
25967     } ## end sub set_block_text_accumulator
25968
25969     sub accumulate_block_text {
25970         my ( $self, $i ) = @_;
25971
25972         # accumulate leading text for -csc, ignoring any side comments
25973         if (   $accumulating_text_for_block
25974             && !$leading_block_text_length_exceeded
25975             && $types_to_go[$i] ne '#' )
25976         {
25977
25978             my $added_length = $token_lengths_to_go[$i];
25979             $added_length += 1 if $i == 0;
25980             my $new_line_length =
25981               $leading_block_text_line_length + $added_length;
25982
25983             # we can add this text if we don't exceed some limits..
25984             if (
25985
25986                 # we must not have already exceeded the text length limit
25987                 length($leading_block_text) <
25988                 $rOpts_closing_side_comment_maximum_text
25989
25990                 # and either:
25991                 # the new total line length must be below the line length limit
25992                 # or the new length must be below the text length limit
25993                 # (ie, we may allow one token to exceed the text length limit)
25994                 && (
25995                     $new_line_length <
25996                     $maximum_line_length_at_level[$leading_block_text_level]
25997
25998                     || length($leading_block_text) + $added_length <
25999                     $rOpts_closing_side_comment_maximum_text
26000                 )
26001
26002                # UNLESS: we are adding a closing paren before the brace we seek.
26003                # This is an attempt to avoid situations where the ... to be
26004                # added are longer than the omitted right paren, as in:
26005
26006              #   foreach my $item (@a_rather_long_variable_name_here) {
26007              #      &whatever;
26008              #   } ## end foreach my $item (@a_rather_long_variable_name_here...
26009
26010                 || (
26011                     $tokens_to_go[$i] eq ')'
26012                     && (
26013                         (
26014                                $i + 1 <= $max_index_to_go
26015                             && $block_type_to_go[ $i + 1 ] eq
26016                             $accumulating_text_for_block
26017                         )
26018                         || (   $i + 2 <= $max_index_to_go
26019                             && $block_type_to_go[ $i + 2 ] eq
26020                             $accumulating_text_for_block )
26021                     )
26022                 )
26023               )
26024             {
26025
26026                 # add an extra space at each newline
26027                 if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
26028                     $leading_block_text .= SPACE;
26029                 }
26030
26031                 # add the token text
26032                 $leading_block_text .= $tokens_to_go[$i];
26033                 $leading_block_text_line_length = $new_line_length;
26034             }
26035
26036             # show that text was truncated if necessary
26037             elsif ( $types_to_go[$i] ne 'b' ) {
26038                 $leading_block_text_length_exceeded = 1;
26039                 $leading_block_text .= '...';
26040             }
26041         }
26042         return;
26043     } ## end sub accumulate_block_text
26044
26045     sub accumulate_csc_text {
26046
26047         my ($self) = @_;
26048
26049         # called once per output buffer when -csc is used. Accumulates
26050         # the text placed after certain closing block braces.
26051         # Defines and returns the following for this buffer:
26052
26053         my $block_leading_text =
26054           EMPTY_STRING;    # the leading text of the last '}'
26055         my $rblock_leading_if_elsif_text;
26056         my $i_block_leading_text =
26057           -1;              # index of token owning block_leading_text
26058         my $block_line_count    = 100;          # how many lines the block spans
26059         my $terminal_type       = 'b';          # type of last nonblank token
26060         my $i_terminal          = 0;            # index of last nonblank token
26061         my $terminal_block_type = EMPTY_STRING;
26062
26063         # update most recent statement label
26064         $csc_last_label = EMPTY_STRING unless ($csc_last_label);
26065         if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
26066         my $block_label = $csc_last_label;
26067
26068         # Loop over all tokens of this batch
26069         for my $i ( 0 .. $max_index_to_go ) {
26070             my $type       = $types_to_go[$i];
26071             my $block_type = $block_type_to_go[$i];
26072             my $token      = $tokens_to_go[$i];
26073
26074             # remember last nonblank token type
26075             if ( $type ne '#' && $type ne 'b' ) {
26076                 $terminal_type       = $type;
26077                 $terminal_block_type = $block_type;
26078                 $i_terminal          = $i;
26079             }
26080
26081             my $type_sequence = $type_sequence_to_go[$i];
26082             if ( $block_type && $type_sequence ) {
26083
26084                 if ( $token eq '}' ) {
26085
26086                     # restore any leading text saved when we entered this block
26087                     if ( defined( $block_leading_text{$type_sequence} ) ) {
26088                         ( $block_leading_text, $rblock_leading_if_elsif_text )
26089                           = @{ $block_leading_text{$type_sequence} };
26090                         $i_block_leading_text = $i;
26091                         delete $block_leading_text{$type_sequence};
26092                         $rleading_block_if_elsif_text =
26093                           $rblock_leading_if_elsif_text;
26094                     }
26095
26096                     if ( defined( $csc_block_label{$type_sequence} ) ) {
26097                         $block_label = $csc_block_label{$type_sequence};
26098                         delete $csc_block_label{$type_sequence};
26099                     }
26100
26101                     # if we run into a '}' then we probably started accumulating
26102                     # at something like a trailing 'if' clause..no harm done.
26103                     if (   $accumulating_text_for_block
26104                         && $levels_to_go[$i] <= $leading_block_text_level )
26105                     {
26106                         my $lev = $levels_to_go[$i];
26107                         reset_block_text_accumulator();
26108                     }
26109
26110                     if ( defined( $block_opening_line_number{$type_sequence} ) )
26111                     {
26112                         my $output_line_number =
26113                           $self->get_output_line_number();
26114                         $block_line_count =
26115                           $output_line_number -
26116                           $block_opening_line_number{$type_sequence} + 1;
26117                         delete $block_opening_line_number{$type_sequence};
26118                     }
26119                     else {
26120
26121                         # Error: block opening line undefined for this line..
26122                         # This shouldn't be possible, but it is not a
26123                         # significant problem.
26124                     }
26125                 }
26126
26127                 elsif ( $token eq '{' ) {
26128
26129                     my $line_number = $self->get_output_line_number();
26130                     $block_opening_line_number{$type_sequence} = $line_number;
26131
26132                     # set a label for this block, except for
26133                     # a bare block which already has the label
26134                     # A label can only be used on the next {
26135                     if ( $block_type =~ /:$/ ) {
26136                         $csc_last_label = EMPTY_STRING;
26137                     }
26138                     $csc_block_label{$type_sequence} = $csc_last_label;
26139                     $csc_last_label = EMPTY_STRING;
26140
26141                     if (   $accumulating_text_for_block
26142                         && $levels_to_go[$i] == $leading_block_text_level )
26143                     {
26144
26145                         if ( $accumulating_text_for_block eq $block_type ) {
26146
26147                             # save any leading text before we enter this block
26148                             $block_leading_text{$type_sequence} = [
26149                                 $leading_block_text,
26150                                 $rleading_block_if_elsif_text
26151                             ];
26152                             $block_opening_line_number{$type_sequence} =
26153                               $leading_block_text_line_number;
26154                             reset_block_text_accumulator();
26155                         }
26156                         else {
26157
26158                             # shouldn't happen, but not a serious error.
26159                             # We were accumulating -csc text for block type
26160                             # $accumulating_text_for_block and unexpectedly
26161                             # encountered a '{' for block type $block_type.
26162                         }
26163                     }
26164                 }
26165             }
26166
26167             if (   $type eq 'k'
26168                 && $csc_new_statement_ok
26169                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
26170                 && $token =~ /$closing_side_comment_list_pattern/ )
26171             {
26172                 $self->set_block_text_accumulator($i);
26173             }
26174             else {
26175
26176                 # note: ignoring type 'q' because of tricks being played
26177                 # with 'q' for hanging side comments
26178                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
26179                     $csc_new_statement_ok =
26180                       ( $block_type || $type eq 'J' || $type eq ';' );
26181                 }
26182                 if (   $type eq ';'
26183                     && $accumulating_text_for_block
26184                     && $levels_to_go[$i] == $leading_block_text_level )
26185                 {
26186                     reset_block_text_accumulator();
26187                 }
26188                 else {
26189                     $self->accumulate_block_text($i);
26190                 }
26191             }
26192         }
26193
26194         # Treat an 'else' block specially by adding preceding 'if' and
26195         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
26196         # especially for cuddled-else formatting.
26197         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
26198             $block_leading_text =
26199               $self->make_else_csc_text( $i_terminal, $terminal_block_type,
26200                 $block_leading_text, $rblock_leading_if_elsif_text );
26201         }
26202
26203         # if this line ends in a label then remember it for the next pass
26204         $csc_last_label = EMPTY_STRING;
26205         if ( $terminal_type eq 'J' ) {
26206             $csc_last_label = $tokens_to_go[$i_terminal];
26207         }
26208
26209         return ( $terminal_type, $i_terminal, $i_block_leading_text,
26210             $block_leading_text, $block_line_count, $block_label );
26211     } ## end sub accumulate_csc_text
26212
26213     sub make_else_csc_text {
26214
26215         # create additional -csc text for an 'else' and optionally 'elsif',
26216         # depending on the value of switch
26217         #
26218         #  = 0 add 'if' text to trailing else
26219         #  = 1 same as 0 plus:
26220         #      add 'if' to 'elsif's if can fit in line length
26221         #      add last 'elsif' to trailing else if can fit in one line
26222         #  = 2 same as 1 but do not check if exceed line length
26223         #
26224         # $rif_elsif_text = a reference to a list of all previous closing
26225         # side comments created for this if block
26226         #
26227         my ( $self, $i_terminal, $block_type, $block_leading_text,
26228             $rif_elsif_text )
26229           = @_;
26230         my $csc_text = $block_leading_text;
26231
26232         if (   $block_type eq 'elsif'
26233             && $rOpts_closing_side_comment_else_flag == 0 )
26234         {
26235             return $csc_text;
26236         }
26237
26238         my $count = @{$rif_elsif_text};
26239         return $csc_text unless ($count);
26240
26241         my $if_text = '[ if' . $rif_elsif_text->[0];
26242
26243         # always show the leading 'if' text on 'else'
26244         if ( $block_type eq 'else' ) {
26245             $csc_text .= $if_text;
26246         }
26247
26248         # see if that's all
26249         if ( $rOpts_closing_side_comment_else_flag == 0 ) {
26250             return $csc_text;
26251         }
26252
26253         my $last_elsif_text = EMPTY_STRING;
26254         if ( $count > 1 ) {
26255             $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
26256             if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
26257         }
26258
26259         # tentatively append one more item
26260         my $saved_text = $csc_text;
26261         if ( $block_type eq 'else' ) {
26262             $csc_text .= $last_elsif_text;
26263         }
26264         else {
26265             $csc_text .= SPACE . $if_text;
26266         }
26267
26268         # all done if no length checks requested
26269         if ( $rOpts_closing_side_comment_else_flag == 2 ) {
26270             return $csc_text;
26271         }
26272
26273         # undo it if line length exceeded
26274         my $length =
26275           length($csc_text) +
26276           length($block_type) +
26277           length( $rOpts->{'closing-side-comment-prefix'} ) +
26278           $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
26279         if (
26280             $length > $maximum_line_length_at_level[$leading_block_text_level] )
26281         {
26282             $csc_text = $saved_text;
26283         }
26284         return $csc_text;
26285     } ## end sub make_else_csc_text
26286 } ## end closure accumulate_csc_text
26287
26288 {    ## begin closure balance_csc_text
26289
26290     # Some additional routines for handling the --closing-side-comments option
26291
26292     my %matching_char;
26293
26294     BEGIN {
26295         %matching_char = (
26296             '{' => '}',
26297             '(' => ')',
26298             '[' => ']',
26299             '}' => '{',
26300             ')' => '(',
26301             ']' => '[',
26302         );
26303     }
26304
26305     sub balance_csc_text {
26306
26307         # Append characters to balance a closing side comment so that editors
26308         # such as vim can correctly jump through code.
26309         # Simple Example:
26310         #  input  = ## end foreach my $foo ( sort { $b  ...
26311         #  output = ## end foreach my $foo ( sort { $b  ...})
26312
26313         # NOTE: This routine does not currently filter out structures within
26314         # quoted text because the bounce algorithms in text editors do not
26315         # necessarily do this either (a version of vim was checked and
26316         # did not do this).
26317
26318         # Some complex examples which will cause trouble for some editors:
26319         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
26320         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
26321         #  if ( $1 eq '{' ) {
26322         # test file test1/braces.pl has many such examples.
26323
26324         my ($csc) = @_;
26325
26326         # loop to examine characters one-by-one, RIGHT to LEFT and
26327         # build a balancing ending, LEFT to RIGHT.
26328         foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
26329
26330             my $char = substr( $csc, $pos, 1 );
26331
26332             # ignore everything except structural characters
26333             next unless ( $matching_char{$char} );
26334
26335             # pop most recently appended character
26336             my $top = chop($csc);
26337
26338             # push it back plus the mate to the newest character
26339             # unless they balance each other.
26340             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
26341         }
26342
26343         # return the balanced string
26344         return $csc;
26345     } ## end sub balance_csc_text
26346 } ## end closure balance_csc_text
26347
26348 sub add_closing_side_comment {
26349
26350     my ( $self, $ri_first, $ri_last ) = @_;
26351     my $rLL = $self->[_rLL_];
26352
26353     # add closing side comments after closing block braces if -csc used
26354     my ( $closing_side_comment, $cscw_block_comment );
26355
26356     #---------------------------------------------------------------
26357     # Step 1: loop through all tokens of this line to accumulate
26358     # the text needed to create the closing side comments. Also see
26359     # how the line ends.
26360     #---------------------------------------------------------------
26361
26362     my ( $terminal_type, $i_terminal, $i_block_leading_text,
26363         $block_leading_text, $block_line_count, $block_label )
26364       = $self->accumulate_csc_text();
26365
26366     #---------------------------------------------------------------
26367     # Step 2: make the closing side comment if this ends a block
26368     #---------------------------------------------------------------
26369     my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
26370
26371     # if this line might end in a block closure..
26372     if (
26373         $terminal_type eq '}'
26374
26375         # Fix 1 for c091, this is only for blocks
26376         && $block_type_to_go[$i_terminal]
26377
26378         # ..and either
26379         && (
26380
26381             # the block is long enough
26382             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
26383
26384             # or there is an existing comment to check
26385             || (   $have_side_comment
26386                 && $rOpts->{'closing-side-comment-warnings'} )
26387         )
26388
26389         # .. and if this is one of the types of interest
26390         && $block_type_to_go[$i_terminal] =~
26391         /$closing_side_comment_list_pattern/
26392
26393         # .. but not an anonymous sub
26394         # These are not normally of interest, and their closing braces are
26395         # often followed by commas or semicolons anyway.  This also avoids
26396         # possible erratic output due to line numbering inconsistencies
26397         # in the cases where their closing braces terminate a line.
26398         && $block_type_to_go[$i_terminal] ne 'sub'
26399
26400         # ..and the corresponding opening brace must is not in this batch
26401         # (because we do not need to tag one-line blocks, although this
26402         # should also be caught with a positive -csci value)
26403         && $mate_index_to_go[$i_terminal] < 0
26404
26405         # ..and either
26406         && (
26407
26408             # this is the last token (line doesn't have a side comment)
26409             !$have_side_comment
26410
26411             # or the old side comment is a closing side comment
26412             || $tokens_to_go[$max_index_to_go] =~
26413             /$closing_side_comment_prefix_pattern/
26414         )
26415       )
26416     {
26417
26418         # then make the closing side comment text
26419         if ($block_label) { $block_label .= SPACE }
26420         my $token =
26421 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
26422
26423         # append any extra descriptive text collected above
26424         if ( $i_block_leading_text == $i_terminal ) {
26425             $token .= $block_leading_text;
26426         }
26427
26428         $token = balance_csc_text($token)
26429           if $rOpts->{'closing-side-comments-balanced'};
26430
26431         $token =~ s/\s*$//;    # trim any trailing whitespace
26432
26433         # handle case of existing closing side comment
26434         if ($have_side_comment) {
26435
26436             # warn if requested and tokens differ significantly
26437             if ( $rOpts->{'closing-side-comment-warnings'} ) {
26438                 my $old_csc = $tokens_to_go[$max_index_to_go];
26439                 my $new_csc = $token;
26440                 $new_csc =~ s/\s+//g;            # trim all whitespace
26441                 $old_csc =~ s/\s+//g;            # trim all whitespace
26442                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
26443                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
26444                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
26445                 my $new_trailing_dots = $1;
26446                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
26447
26448                 # Patch to handle multiple closing side comments at
26449                 # else and elsif's.  These have become too complicated
26450                 # to check, so if we see an indication of
26451                 # '[ if' or '[ # elsif', then assume they were made
26452                 # by perltidy.
26453                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
26454                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
26455                 }
26456                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
26457                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
26458                 }
26459
26460                 # if old comment is contained in new comment,
26461                 # only compare the common part.
26462                 if ( length($new_csc) > length($old_csc) ) {
26463                     $new_csc = substr( $new_csc, 0, length($old_csc) );
26464                 }
26465
26466                 # if the new comment is shorter and has been limited,
26467                 # only compare the common part.
26468                 if ( length($new_csc) < length($old_csc)
26469                     && $new_trailing_dots )
26470                 {
26471                     $old_csc = substr( $old_csc, 0, length($new_csc) );
26472                 }
26473
26474                 # any remaining difference?
26475                 if ( $new_csc ne $old_csc ) {
26476
26477                     # just leave the old comment if we are below the threshold
26478                     # for creating side comments
26479                     if ( $block_line_count <
26480                         $rOpts->{'closing-side-comment-interval'} )
26481                     {
26482                         $token = undef;
26483                     }
26484
26485                     # otherwise we'll make a note of it
26486                     else {
26487
26488                         warning(
26489 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
26490                         );
26491
26492                         # save the old side comment in a new trailing block
26493                         # comment
26494                         my $timestamp = EMPTY_STRING;
26495                         if ( $rOpts->{'timestamp'} ) {
26496                             my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
26497                             $year  += 1900;
26498                             $month += 1;
26499                             $timestamp = "$year-$month-$day";
26500                         }
26501                         $cscw_block_comment =
26502 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
26503 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
26504                     }
26505                 }
26506                 else {
26507
26508                     # No differences.. we can safely delete old comment if we
26509                     # are below the threshold
26510                     if ( $block_line_count <
26511                         $rOpts->{'closing-side-comment-interval'} )
26512                     {
26513                         # Since the line breaks have already been set, we have
26514                         # to remove the token from the _to_go array and also
26515                         # from the line range (this fixes issue c081).
26516                         # Note that we can only get here if -cscw has been set
26517                         # because otherwise the old comment is already deleted.
26518                         $token = undef;
26519                         my $ibeg = $ri_first->[-1];
26520                         my $iend = $ri_last->[-1];
26521                         if (   $iend > $ibeg
26522                             && $iend == $max_index_to_go
26523                             && $types_to_go[$max_index_to_go] eq '#' )
26524                         {
26525                             $iend--;
26526                             $max_index_to_go--;
26527                             if (   $iend > $ibeg
26528                                 && $types_to_go[$max_index_to_go] eq 'b' )
26529                             {
26530                                 $iend--;
26531                                 $max_index_to_go--;
26532                             }
26533                             $ri_last->[-1] = $iend;
26534                         }
26535                     }
26536                 }
26537             }
26538
26539             # switch to the new csc (unless we deleted it!)
26540             if ($token) {
26541
26542                 my $len_tok = length($token); # NOTE: length no longer important
26543                 my $added_len =
26544                   $len_tok - $token_lengths_to_go[$max_index_to_go];
26545
26546                 $tokens_to_go[$max_index_to_go]        = $token;
26547                 $token_lengths_to_go[$max_index_to_go] = $len_tok;
26548                 my $K = $K_to_go[$max_index_to_go];
26549                 $rLL->[$K]->[_TOKEN_]        = $token;
26550                 $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
26551                 $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
26552             }
26553         }
26554
26555         # handle case of NO existing closing side comment
26556         else {
26557
26558             # To avoid inserting a new token in the token arrays, we
26559             # will just return the new side comment so that it can be
26560             # inserted just before it is needed in the call to the
26561             # vertical aligner.
26562             $closing_side_comment = $token;
26563         }
26564     }
26565     return ( $closing_side_comment, $cscw_block_comment );
26566 } ## end sub add_closing_side_comment
26567
26568 ############################
26569 # CODE SECTION 15: Summarize
26570 ############################
26571
26572 sub wrapup {
26573
26574     # This is the last routine called when a file is formatted.
26575     # Flush buffer and write any informative messages
26576     my $self = shift;
26577
26578     $self->flush();
26579     my $file_writer_object = $self->[_file_writer_object_];
26580     $file_writer_object->decrement_output_line_number()
26581       ;    # fix up line number since it was incremented
26582     we_are_at_the_last_line();
26583
26584     my $max_depth = $self->[_maximum_BLOCK_level_];
26585     my $at_line   = $self->[_maximum_BLOCK_level_at_line_];
26586     write_logfile_entry(
26587 "Maximum leading structural depth is $max_depth in input at line $at_line\n"
26588     );
26589
26590     my $added_semicolon_count    = $self->[_added_semicolon_count_];
26591     my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
26592     my $last_added_semicolon_at  = $self->[_last_added_semicolon_at_];
26593
26594     if ( $added_semicolon_count > 0 ) {
26595         my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
26596         my $what =
26597           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
26598         write_logfile_entry("$added_semicolon_count $what added:\n");
26599         write_logfile_entry(
26600             "  $first at input line $first_added_semicolon_at\n");
26601
26602         if ( $added_semicolon_count > 1 ) {
26603             write_logfile_entry(
26604                 "   Last at input line $last_added_semicolon_at\n");
26605         }
26606         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
26607         write_logfile_entry("\n");
26608     }
26609
26610     my $deleted_semicolon_count    = $self->[_deleted_semicolon_count_];
26611     my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
26612     my $last_deleted_semicolon_at  = $self->[_last_deleted_semicolon_at_];
26613     if ( $deleted_semicolon_count > 0 ) {
26614         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
26615         my $what =
26616           ( $deleted_semicolon_count > 1 )
26617           ? "semicolons were"
26618           : "semicolon was";
26619         write_logfile_entry(
26620             "$deleted_semicolon_count unnecessary $what deleted:\n");
26621         write_logfile_entry(
26622             "  $first at input line $first_deleted_semicolon_at\n");
26623
26624         if ( $deleted_semicolon_count > 1 ) {
26625             write_logfile_entry(
26626                 "   Last at input line $last_deleted_semicolon_at\n");
26627         }
26628         write_logfile_entry("  (Use -ndsm to prevent semicolon deletion)\n");
26629         write_logfile_entry("\n");
26630     }
26631
26632     my $embedded_tab_count    = $self->[_embedded_tab_count_];
26633     my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
26634     my $last_embedded_tab_at  = $self->[_last_embedded_tab_at_];
26635     if ( $embedded_tab_count > 0 ) {
26636         my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
26637         my $what =
26638           ( $embedded_tab_count > 1 )
26639           ? "quotes or patterns"
26640           : "quote or pattern";
26641         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
26642         write_logfile_entry(
26643 "This means the display of this script could vary with device or software\n"
26644         );
26645         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
26646
26647         if ( $embedded_tab_count > 1 ) {
26648             write_logfile_entry(
26649                 "   Last at input line $last_embedded_tab_at\n");
26650         }
26651         write_logfile_entry("\n");
26652     }
26653
26654     my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
26655     my $last_tabbing_disagreement  = $self->[_last_tabbing_disagreement_];
26656     my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
26657     my $in_tabbing_disagreement    = $self->[_in_tabbing_disagreement_];
26658
26659     if ($first_tabbing_disagreement) {
26660         write_logfile_entry(
26661 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
26662         );
26663     }
26664
26665     my $first_btd = $self->[_first_brace_tabbing_disagreement_];
26666     if ($first_btd) {
26667         my $msg =
26668 "First closing brace indentation disagreement started at input line $first_btd\n";
26669         write_logfile_entry($msg);
26670
26671         # leave a hint in the .ERR file if there was a brace error
26672         if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26673     }
26674
26675     my $in_btd = $self->[_in_brace_tabbing_disagreement_];
26676     if ($in_btd) {
26677         my $msg =
26678 "Ending with brace indentation disagreement which started at input line $in_btd\n";
26679         write_logfile_entry($msg);
26680
26681         # leave a hint in the .ERR file if there was a brace error
26682         if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
26683     }
26684
26685     if ($in_tabbing_disagreement) {
26686         my $msg =
26687 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
26688         write_logfile_entry($msg);
26689     }
26690     else {
26691
26692         if ($last_tabbing_disagreement) {
26693
26694             write_logfile_entry(
26695 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
26696             );
26697         }
26698         else {
26699             write_logfile_entry("No indentation disagreement seen\n");
26700         }
26701     }
26702
26703     if ($first_tabbing_disagreement) {
26704         write_logfile_entry(
26705 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
26706         );
26707     }
26708     write_logfile_entry("\n");
26709
26710     my $vao = $self->[_vertical_aligner_object_];
26711     $vao->report_anything_unusual();
26712
26713     $file_writer_object->report_line_length_errors();
26714
26715     $self->[_converged_] = $file_writer_object->get_convergence_check()
26716       || $rOpts->{'indent-only'};
26717
26718     return;
26719 } ## end sub wrapup
26720
26721 } ## end package Perl::Tidy::Formatter
26722 1;